JanviMl commited on
Commit
685f359
·
verified ·
1 Parent(s): b69b713

Update metrics.py

Browse files
Files changed (1) hide show
  1. metrics.py +101 -44
metrics.py CHANGED
@@ -1,59 +1,116 @@
1
  # metrics.py
2
- import torch
3
- from sentence_transformers import SentenceTransformer, util
4
- from transformers import pipeline
 
5
 
6
- # Load Sentence-BERT model for semantic similarity
7
- sentence_bert_model = SentenceTransformer('all-MiniLM-L6-v2')
8
 
9
- # Load a pre-trained emotion classifier
10
- emotion_classifier = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=None)
11
-
12
- def compute_semantic_similarity(original_comment, paraphrased_comment):
13
  """
14
- Compute the semantic similarity between the original and paraphrased comments using Sentence-BERT.
15
- Returns a score between 0 and 1 (higher is better).
16
  """
17
- original_embedding = sentence_bert_model.encode(original_comment, convert_to_tensor=True)
18
- paraphrased_embedding = sentence_bert_model.encode(paraphrased_comment, convert_to_tensor=True)
19
- similarity_score = util.cos_sim(original_embedding, paraphrased_embedding)[0][0].item()
20
- return round(similarity_score, 2)
 
 
 
 
21
 
22
- def compute_emotion_shift(original_comment, paraphrased_comment):
23
  """
24
- Compute the shift in emotional tone between the original and paraphrased comments.
25
- Returns the dominant emotion labels for both comments and a flag indicating if the shift is positive.
26
  """
27
- # Classify emotions in the original comment
28
- original_emotions = emotion_classifier(original_comment)
29
- # Since pipeline returns a list of lists, take the first (and only) inner list
30
- original_emotions = original_emotions[0] if isinstance(original_emotions, list) and original_emotions else []
31
- original_dominant_emotion = max(original_emotions, key=lambda x: x['score'])['label'] if original_emotions else "unknown"
 
 
 
 
 
 
 
32
 
33
- # Classify emotions in the paraphrased comment
34
- paraphrased_emotions = emotion_classifier(paraphrased_comment)
35
- paraphrased_emotions = paraphrased_emotions[0] if isinstance(paraphrased_emotions, list) and paraphrased_emotions else []
36
- paraphrased_dominant_emotion = max(paraphrased_emotions, key=lambda x: x['score'])['label'] if paraphrased_emotions else "unknown"
 
37
 
38
- # Define negative and positive emotions
39
- negative_emotions = ['anger', 'sadness', 'fear']
40
- positive_emotions = ['joy', 'love']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- # Check if the shift is positive (e.g., from a negative emotion to a neutral/positive one)
43
- is_positive_shift = (
44
- original_dominant_emotion in negative_emotions and
45
- (paraphrased_dominant_emotion in positive_emotions or paraphrased_dominant_emotion not in negative_emotions)
46
- )
 
 
 
 
 
 
 
 
47
 
48
- return original_dominant_emotion, paraphrased_dominant_emotion, is_positive_shift
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- def compute_empathy_score(paraphrased_comment):
51
  """
52
- Compute a proxy empathy score based on politeness keywords.
53
- Returns a score between 0 and 1 (higher indicates more empathy).
54
  """
55
- empathy_keywords = ['please', 'thank you', 'appreciate', 'understand', 'sorry', 'consider', 'kindly', 'help', 'support']
56
- comment_lower = paraphrased_comment.lower()
57
- keyword_count = sum(1 for keyword in empathy_keywords if keyword in comment_lower)
58
- empathy_score = min(keyword_count / 3, 1.0)
59
- return round(empathy_score, 2)
 
 
 
 
 
 
 
 
1
  # metrics.py
2
+ import nltk
3
+ from nltk.translate.bleu_score import sentence_bleu
4
+ from rouge_score import rouge_scorer
5
+ from model_loader import metrics_models
6
 
7
+ # Download required NLTK data
8
+ nltk.download('punkt')
9
 
10
+ def compute_semantic_similarity(original, paraphrased):
 
 
 
11
  """
12
+ Compute semantic similarity between the original and paraphrased comment using Sentence-BERT.
13
+ Returns a similarity score between 0 and 1.
14
  """
15
+ try:
16
+ sentence_bert = metrics_models.load_sentence_bert()
17
+ embeddings = sentence_bert.encode([original, paraphrased])
18
+ similarity = float(embeddings[0] @ embeddings[1].T)
19
+ return round(similarity, 2)
20
+ except Exception as e:
21
+ print(f"Error computing semantic similarity: {str(e)}")
22
+ return None
23
 
24
+ def compute_emotion_shift(original, paraphrased):
25
  """
26
+ Compute the emotion shift between the original and paraphrased comment.
27
+ Returns the original emotion, paraphrased emotion, and whether the shift is positive.
28
  """
29
+ try:
30
+ emotion_classifier = metrics_models.load_emotion_classifier()
31
+ original_emotions = emotion_classifier(original)
32
+ paraphrased_emotions = emotion_classifier(paraphrased)
33
+
34
+ # Get the top emotion for each
35
+ original_emotion = max(original_emotions[0], key=lambda x: x['score'])['label']
36
+ paraphrased_emotion = max(paraphrased_emotions[0], key=lambda x: x['score'])['label']
37
+
38
+ # Define negative and positive emotions
39
+ negative_emotions = ['anger', 'sadness', 'fear']
40
+ positive_emotions = ['joy', 'love', 'surprise']
41
 
42
+ # Determine if the shift is positive
43
+ emotion_shift_positive = (
44
+ (original_emotion in negative_emotions and paraphrased_emotion in positive_emotions) or
45
+ (original_emotion in negative_emotions and paraphrased_emotion not in negative_emotions)
46
+ )
47
 
48
+ return original_emotion, paraphrased_emotion, emotion_shift_positive
49
+ except Exception as e:
50
+ print(f"Error computing emotion shift: {str(e)}")
51
+ return None, None, None
52
+
53
+ def compute_empathy_score(paraphrased):
54
+ """
55
+ Compute an empathy score for the paraphrased comment (placeholder).
56
+ Returns a score between 0 and 1.
57
+ """
58
+ try:
59
+ # Placeholder: Compute empathy based on word presence (e.g., "sorry", "understand")
60
+ empathy_words = ["sorry", "understand", "care", "help", "support"]
61
+ words = paraphrased.lower().split()
62
+ empathy_count = sum(1 for word in words if word in empathy_words)
63
+ score = empathy_count / len(words) if words else 0
64
+ return round(score, 2)
65
+ except Exception as e:
66
+ print(f"Error computing empathy score: {str(e)}")
67
+ return None
68
 
69
+ def compute_bleu_score(original, paraphrased):
70
+ """
71
+ Compute the BLEU score between the original and paraphrased comment.
72
+ Returns a score between 0 and 1.
73
+ """
74
+ try:
75
+ reference = [nltk.word_tokenize(original.lower())]
76
+ candidate = nltk.word_tokenize(paraphrased.lower())
77
+ score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))
78
+ return round(score, 2)
79
+ except Exception as e:
80
+ print(f"Error computing BLEU score: {str(e)}")
81
+ return None
82
 
83
+ def compute_rouge_score(original, paraphrased):
84
+ """
85
+ Compute ROUGE scores (ROUGE-1, ROUGE-2, ROUGE-L) between the original and paraphrased comment.
86
+ Returns a dictionary with ROUGE scores.
87
+ """
88
+ try:
89
+ scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
90
+ scores = scorer.score(original, paraphrased)
91
+ return {
92
+ 'rouge1': round(scores['rouge1'].fmeasure, 2),
93
+ 'rouge2': round(scores['rouge2'].fmeasure, 2),
94
+ 'rougeL': round(scores['rougeL'].fmeasure, 2)
95
+ }
96
+ except Exception as e:
97
+ print(f"Error computing ROUGE scores: {str(e)}")
98
+ return None
99
 
100
+ def compute_entailment_score(original, paraphrased):
101
  """
102
+ Compute the entailment score to check factual consistency using an NLI model.
103
+ Returns a score between 0 and 1.
104
  """
105
+ try:
106
+ nli_classifier = metrics_models.load_nli_classifier()
107
+ result = nli_classifier(
108
+ original,
109
+ paraphrased,
110
+ candidate_labels=["entailment", "contradiction", "neutral"]
111
+ )
112
+ entailment_score = next(score for label, score in zip(result['labels'], result['scores']) if label == "entailment")
113
+ return round(entailment_score, 2)
114
+ except Exception as e:
115
+ print(f"Error computing entailment score: {str(e)}")
116
+ return None