# evaluation.py

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score

def evaluate_model(model_path, test_data):
    """
    Evaluates the model based on test data and returns the score.
    This function assumes a classification task and requires the test data in a specific format.
    
    Args:
    - model_path: Path to the model file (e.g., PyTorch model file)
    - test_data: A list of tuples (text, label) for evaluation

    Returns:
    - score: Evaluation score (e.g., accuracy)
    """
    
    # Load model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    model.eval()
    all_preds = []
    all_labels = []

    for text, label in test_data:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1).item()
        
        all_preds.append(predictions)
        all_labels.append(label)

    # Calculate accuracy (or any other metric)
    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy