Update metrics.py
Browse files- metrics.py +48 -7
metrics.py
CHANGED
@@ -8,6 +8,50 @@ def softmax(logits):
|
|
8 |
exp_logits = np.exp(logits - np.max(logits))
|
9 |
return exp_logits / exp_logits.sum()
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def compute_reward_scores(original, paraphrase):
|
12 |
"""
|
13 |
Compute reward scores for a paraphrased comment.
|
@@ -44,17 +88,14 @@ def compute_reward_scores(original, paraphrase):
|
|
44 |
probs = softmax(logits)
|
45 |
|
46 |
toxicity = probs[1] # Assuming label 1 is toxic
|
47 |
-
empathy =
|
48 |
-
bias =
|
49 |
print(f"Classification took {time.time() - start_time:.2f} seconds")
|
50 |
|
51 |
# Compute semantic similarity using Sentence-BERT
|
52 |
print("Computing semantic similarity...")
|
53 |
-
|
54 |
-
|
55 |
-
similarity = torch.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
|
56 |
-
hallucination = 1.0 - similarity # High difference means potential hallucination
|
57 |
-
print(f"Semantic similarity computed: {similarity}")
|
58 |
|
59 |
# Compute reward score (weighted combination)
|
60 |
reward = 0.4 * empathy - 0.2 * toxicity - 0.2 * bias - 0.2 * hallucination
|
|
|
8 |
exp_logits = np.exp(logits - np.max(logits))
|
9 |
return exp_logits / exp_logits.sum()
|
10 |
|
11 |
+
def compute_semantic_similarity(original, paraphrase):
|
12 |
+
"""
|
13 |
+
Compute semantic similarity between original and paraphrased text using Sentence-BERT.
|
14 |
+
Returns a similarity score between 0 and 1.
|
15 |
+
"""
|
16 |
+
try:
|
17 |
+
if not isinstance(original, str) or not isinstance(paraphrase, str):
|
18 |
+
print(f"Invalid input for semantic similarity: original={original}, paraphrase={paraphrase}")
|
19 |
+
return 0.0
|
20 |
+
if "Error: Unable to generate paraphrase" in paraphrase:
|
21 |
+
print(f"Invalid paraphrase: {paraphrase}. Returning similarity 0.0.")
|
22 |
+
return 0.0
|
23 |
+
|
24 |
+
sentence_bert = metrics_models.sentence_bert
|
25 |
+
embeddings = sentence_bert.encode([original, paraphrase], convert_to_tensor=True)
|
26 |
+
similarity = torch.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
|
27 |
+
print(f"Semantic similarity computed: {similarity}")
|
28 |
+
return similarity
|
29 |
+
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error computing semantic similarity: {str(e)}")
|
32 |
+
return 0.0
|
33 |
+
|
34 |
+
def compute_empathy_score(toxicity):
|
35 |
+
"""
|
36 |
+
Placeholder for empathy score computation.
|
37 |
+
For now, inversely proportional to toxicity.
|
38 |
+
"""
|
39 |
+
return 1.0 - toxicity
|
40 |
+
|
41 |
+
def compute_bias_score(toxicity):
|
42 |
+
"""
|
43 |
+
Placeholder for bias score computation.
|
44 |
+
For now, equal to toxicity.
|
45 |
+
"""
|
46 |
+
return toxicity
|
47 |
+
|
48 |
+
def compute_hallucination_score(similarity):
|
49 |
+
"""
|
50 |
+
Compute hallucination score based on semantic similarity.
|
51 |
+
High difference means potential hallucination.
|
52 |
+
"""
|
53 |
+
return 1.0 - similarity
|
54 |
+
|
55 |
def compute_reward_scores(original, paraphrase):
|
56 |
"""
|
57 |
Compute reward scores for a paraphrased comment.
|
|
|
88 |
probs = softmax(logits)
|
89 |
|
90 |
toxicity = probs[1] # Assuming label 1 is toxic
|
91 |
+
empathy = compute_empathy_score(toxicity)
|
92 |
+
bias = compute_bias_score(toxicity)
|
93 |
print(f"Classification took {time.time() - start_time:.2f} seconds")
|
94 |
|
95 |
# Compute semantic similarity using Sentence-BERT
|
96 |
print("Computing semantic similarity...")
|
97 |
+
similarity = compute_semantic_similarity(original, paraphrase)
|
98 |
+
hallucination = compute_hallucination_score(similarity)
|
|
|
|
|
|
|
99 |
|
100 |
# Compute reward score (weighted combination)
|
101 |
reward = 0.4 * empathy - 0.2 * toxicity - 0.2 * bias - 0.2 * hallucination
|