test_flan_t5.py

from utils import build_test_samples, save_json
from config import BaseConfig
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
from tqdm import tqdm
from sklearn.metrics import classification_report
import argparse
import pandas as pd
from statistics import mode

def predict(X, model, tokenizer, max_source_length, max_target_length, device):
    inputs = tokenizer(X, max_length=max_source_length, padding='max_length', 
                       return_tensors="pt", truncation=True)
    inputs.to(device)
    with torch.no_grad():
        sequence_ids = model.generate(inputs.input_ids,num_beams=1, max_length=max_target_length)
    sequences = tokenizer.batch_decode(sequence_ids, skip_special_tokens=True)
    return sequences

def load_test(config, template):
    dataset_csv = pd.read_csv(config.subtask_train_test)
    source_text = build_test_samples(dataset_csv['text'].tolist(), template)
    target_text = dataset_csv['label'].tolist()
    return source_text, target_text

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument("--subtask", required=True) # 1, 2, 3
    parser.add_argument("--model", required=True)   # large, xl
    parser.add_argument("--combined_model", required=True)  
    parser.add_argument("--final_model", required=True) 
    args = parser.parse_args()

    CONFIG = BaseConfig().get_args(subtask=int(args.subtask), model=args.model)
    print("ARGS:", vars(args))
    print("Evaluating Model:", CONFIG.model_output_path)
    print("Dataset    Path :", CONFIG.subtask_train_test)
    if args.combined_model == "True":
        print("Testing model for: COMBINED MODELS (MULTI-TASK FSL LEARNING)")
        TOKENIZER = T5Tokenizer.from_pretrained(CONFIG.model_output_path_combined)
        MODEL = T5ForConditionalGeneration.from_pretrained(CONFIG.model_output_path_combined)
    elif args.final_model=="True":
        print("Testing model for: FINAL FSL TRAINING FOR SUBMITING")
        TOKENIZER = T5Tokenizer.from_pretrained(CONFIG.model_output_path+"-final")
        MODEL = T5ForConditionalGeneration.from_pretrained(CONFIG.model_output_path+"-final")
    else:
        print("Testing model for: SINGLE TASK FSL LEARNING")
        TOKENIZER = T5Tokenizer.from_pretrained(CONFIG.model_output_path)
        MODEL = T5ForConditionalGeneration.from_pretrained(CONFIG.model_output_path)
    MODEL.to(CONFIG.device)
    MODEL.eval()

    prediction_dict = {}

    for index, template in enumerate(CONFIG.templates_dict['train']):
        TEMPLATES = template
        SOURCE, TARGET = load_test(CONFIG, TEMPLATES)
        predictions = []
        prediction_error_dict =  {'no influence':'no influencer', 
                                'racing':'gaming', 
                                'sleeping':'other', 
                                'lastgame':'gaming'}
        for source, target in tqdm(zip(SOURCE, TARGET)):
            inputs = TOKENIZER(source, 
                            max_length=CONFIG.max_source_length, 
                            padding='max_length', 
                            return_tensors="pt", 
                            truncation=True)
            inputs.to(CONFIG.device)
            with torch.no_grad():
                sequence_ids = MODEL.generate(inputs.input_ids,
                                            num_beams=1, 
                                            max_length=CONFIG.max_target_length)
            predict = TOKENIZER.batch_decode(sequence_ids, skip_special_tokens=True)[0]

            predict = prediction_error_dict.get(predict, predict)
            if predict in target:
                predict = target
            predictions.append(predict)
        prediction_dict[index] = predictions
    ensemble_predictions = []
    for index in range(len(TARGET)):
        ensemble_prediction = []
        for template_id, template_predicts in prediction_dict.items():
            for template_predict_index, template_predict in enumerate(template_predicts):
                if template_predict_index == index:
                    ensemble_prediction.append(template_predict)
                    break
        ensemble_predictions.append(ensemble_prediction)

    final_predictions = [mode(predictions) for predictions in ensemble_predictions]

    results = classification_report(TARGET, final_predictions, output_dict=True)
    print("F1-Score (Macro) is:", results['macro avg']['f1-score'])
    print("Storing results in:", CONFIG.result_multiple_file)
    report = {
        "results": results,
        "configs": vars(CONFIG)
    }
    save_json(report, CONFIG.result_multiple_file)