JanviMl commited on
Commit
7351455
·
verified ·
1 Parent(s): 77f7351

Update metrics.py

Browse files
Files changed (1) hide show
  1. metrics.py +48 -7
metrics.py CHANGED
@@ -8,6 +8,50 @@ def softmax(logits):
8
  exp_logits = np.exp(logits - np.max(logits))
9
  return exp_logits / exp_logits.sum()
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def compute_reward_scores(original, paraphrase):
12
  """
13
  Compute reward scores for a paraphrased comment.
@@ -44,17 +88,14 @@ def compute_reward_scores(original, paraphrase):
44
  probs = softmax(logits)
45
 
46
  toxicity = probs[1] # Assuming label 1 is toxic
47
- empathy = 1.0 - toxicity # Simplified empathy score
48
- bias = probs[1] # Placeholder for bias
49
  print(f"Classification took {time.time() - start_time:.2f} seconds")
50
 
51
  # Compute semantic similarity using Sentence-BERT
52
  print("Computing semantic similarity...")
53
- sentence_bert = metrics_models.sentence_bert
54
- embeddings = sentence_bert.encode([original, paraphrase], convert_to_tensor=True)
55
- similarity = torch.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
56
- hallucination = 1.0 - similarity # High difference means potential hallucination
57
- print(f"Semantic similarity computed: {similarity}")
58
 
59
  # Compute reward score (weighted combination)
60
  reward = 0.4 * empathy - 0.2 * toxicity - 0.2 * bias - 0.2 * hallucination
 
8
  exp_logits = np.exp(logits - np.max(logits))
9
  return exp_logits / exp_logits.sum()
10
 
11
+ def compute_semantic_similarity(original, paraphrase):
12
+ """
13
+ Compute semantic similarity between original and paraphrased text using Sentence-BERT.
14
+ Returns a similarity score between 0 and 1.
15
+ """
16
+ try:
17
+ if not isinstance(original, str) or not isinstance(paraphrase, str):
18
+ print(f"Invalid input for semantic similarity: original={original}, paraphrase={paraphrase}")
19
+ return 0.0
20
+ if "Error: Unable to generate paraphrase" in paraphrase:
21
+ print(f"Invalid paraphrase: {paraphrase}. Returning similarity 0.0.")
22
+ return 0.0
23
+
24
+ sentence_bert = metrics_models.sentence_bert
25
+ embeddings = sentence_bert.encode([original, paraphrase], convert_to_tensor=True)
26
+ similarity = torch.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
27
+ print(f"Semantic similarity computed: {similarity}")
28
+ return similarity
29
+
30
+ except Exception as e:
31
+ print(f"Error computing semantic similarity: {str(e)}")
32
+ return 0.0
33
+
34
+ def compute_empathy_score(toxicity):
35
+ """
36
+ Placeholder for empathy score computation.
37
+ For now, inversely proportional to toxicity.
38
+ """
39
+ return 1.0 - toxicity
40
+
41
+ def compute_bias_score(toxicity):
42
+ """
43
+ Placeholder for bias score computation.
44
+ For now, equal to toxicity.
45
+ """
46
+ return toxicity
47
+
48
+ def compute_hallucination_score(similarity):
49
+ """
50
+ Compute hallucination score based on semantic similarity.
51
+ High difference means potential hallucination.
52
+ """
53
+ return 1.0 - similarity
54
+
55
  def compute_reward_scores(original, paraphrase):
56
  """
57
  Compute reward scores for a paraphrased comment.
 
88
  probs = softmax(logits)
89
 
90
  toxicity = probs[1] # Assuming label 1 is toxic
91
+ empathy = compute_empathy_score(toxicity)
92
+ bias = compute_bias_score(toxicity)
93
  print(f"Classification took {time.time() - start_time:.2f} seconds")
94
 
95
  # Compute semantic similarity using Sentence-BERT
96
  print("Computing semantic similarity...")
97
+ similarity = compute_semantic_similarity(original, paraphrase)
98
+ hallucination = compute_hallucination_score(similarity)
 
 
 
99
 
100
  # Compute reward score (weighted combination)
101
  reward = 0.4 * empathy - 0.2 * toxicity - 0.2 * bias - 0.2 * hallucination