danielle2003 commited on
Commit
6e15199
·
1 Parent(s): 3fbc9ee
Files changed (1) hide show
  1. scripts/evaluate.py +11 -7
scripts/evaluate.py CHANGED
@@ -1,24 +1,28 @@
1
- from transformers import pipeline, AutoModelForSequenceClassification
 
2
  from datasets import load_dataset
3
  from sklearn.metrics import accuracy_score, f1_score
4
 
5
- # Load dataset
6
- dataset = load_dataset("allocine")["test"]
 
 
 
7
 
8
  # Load model and tokenizer
9
  model_path = "./models"
10
- classifier = pipeline("text-classification", model=model_path, tokenizer=model_path)
11
 
12
- # Get actual model labels
13
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
14
- label_map = {v: k for k, v in model.config.label2id.items()} # Adjust dynamically
15
 
16
  # Get predictions
17
  predictions = [classifier(text["review"], truncation=True, max_length=512)[0]["label"] for text in dataset]
18
  labels = dataset["label"]
19
 
20
  # Convert labels
21
- predictions = [label_map[p] for p in predictions]
22
 
23
  # Compute metrics
24
  accuracy = accuracy_score(labels, predictions)
 
1
+ import torch
2
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score, f1_score
5
 
6
+ # Load dataset (limit to 10 samples for faster evaluation)
7
+ dataset = load_dataset("allocine")["test"].select(range(10))
8
+
9
+ # Use GPU if available
10
+ device = 0 if torch.cuda.is_available() else -1
11
 
12
  # Load model and tokenizer
13
  model_path = "./models"
14
+ classifier = pipeline("text-classification", model=model_path, tokenizer=model_path, device=device)
15
 
16
+ # Load model to get dynamic label mapping
17
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
18
+ label_map = {v: f"LABEL_{k}" for k, v in model.config.label2id.items()} # Ensure mapping is correct
19
 
20
  # Get predictions
21
  predictions = [classifier(text["review"], truncation=True, max_length=512)[0]["label"] for text in dataset]
22
  labels = dataset["label"]
23
 
24
  # Convert labels
25
+ predictions = [int(label_map[p].split("_")[-1]) for p in predictions] # Convert back to int labels
26
 
27
  # Compute metrics
28
  accuracy = accuracy_score(labels, predictions)