# evaluation.py import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer from sklearn.metrics import accuracy_score def evaluate_model(model_path, test_data): """ Evaluates the model based on test data and returns the score. This function assumes a classification task and requires the test data in a specific format. Args: - model_path: Path to the model file (e.g., PyTorch model file) - test_data: A list of tuples (text, label) for evaluation Returns: - score: Evaluation score (e.g., accuracy) """ # Load model and tokenizer model = AutoModelForSequenceClassification.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) model.eval() all_preds = [] all_labels = [] for text, label in test_data: inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) outputs = model(**inputs) logits = outputs.logits predictions = torch.argmax(logits, dim=-1).item() all_preds.append(predictions) all_labels.append(label) # Calculate accuracy (or any other metric) accuracy = accuracy_score(all_labels, all_preds) return accuracy