JanviMl's picture
Update metrics.py
7351455 verified
# metrics.py
from model_loader import classifier_model, metrics_models
import torch
import numpy as np
import time
def softmax(logits):
exp_logits = np.exp(logits - np.max(logits))
return exp_logits / exp_logits.sum()
def compute_semantic_similarity(original, paraphrase):
"""
Compute semantic similarity between original and paraphrased text using Sentence-BERT.
Returns a similarity score between 0 and 1.
"""
try:
if not isinstance(original, str) or not isinstance(paraphrase, str):
print(f"Invalid input for semantic similarity: original={original}, paraphrase={paraphrase}")
return 0.0
if "Error: Unable to generate paraphrase" in paraphrase:
print(f"Invalid paraphrase: {paraphrase}. Returning similarity 0.0.")
return 0.0
sentence_bert = metrics_models.sentence_bert
embeddings = sentence_bert.encode([original, paraphrase], convert_to_tensor=True)
similarity = torch.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
print(f"Semantic similarity computed: {similarity}")
return similarity
except Exception as e:
print(f"Error computing semantic similarity: {str(e)}")
return 0.0
def compute_empathy_score(toxicity):
"""
Placeholder for empathy score computation.
For now, inversely proportional to toxicity.
"""
return 1.0 - toxicity
def compute_bias_score(toxicity):
"""
Placeholder for bias score computation.
For now, equal to toxicity.
"""
return toxicity
def compute_hallucination_score(similarity):
"""
Compute hallucination score based on semantic similarity.
High difference means potential hallucination.
"""
return 1.0 - similarity
def compute_reward_scores(original, paraphrase):
"""
Compute reward scores for a paraphrased comment.
Returns a dictionary with empathy, toxicity, bias, hallucination, and reward scores.
"""
try:
start_time = time.time()
print("Starting reward computation...")
# Check if paraphrase is valid
if not isinstance(paraphrase, str) or "Error: Unable to generate paraphrase" in paraphrase:
print(f"Invalid paraphrase: {paraphrase}. Returning default scores.")
return {
"empathy": 0.0,
"toxicity": 1.0,
"bias": 1.0,
"hallucination": 1.0,
"reward": 0.0
}
# Classify the paraphrased comment
print("Starting classification...")
inputs = classifier_model.tokenizer(
paraphrase,
return_tensors="pt",
truncation=True,
padding=True,
max_length=512
).to(classifier_model.device)
with torch.no_grad():
outputs = classifier_model.model(**inputs)
logits = outputs.logits.cpu().numpy()[0]
probs = softmax(logits)
toxicity = probs[1] # Assuming label 1 is toxic
empathy = compute_empathy_score(toxicity)
bias = compute_bias_score(toxicity)
print(f"Classification took {time.time() - start_time:.2f} seconds")
# Compute semantic similarity using Sentence-BERT
print("Computing semantic similarity...")
similarity = compute_semantic_similarity(original, paraphrase)
hallucination = compute_hallucination_score(similarity)
# Compute reward score (weighted combination)
reward = 0.4 * empathy - 0.2 * toxicity - 0.2 * bias - 0.2 * hallucination
reward = max(0.0, min(1.0, reward))
print(f"Total processing time: {time.time() - start_time:.2f} seconds")
return {
"empathy": empathy,
"toxicity": toxicity,
"bias": bias,
"hallucination": hallucination,
"reward": reward
}
except Exception as e:
print(f"Error in reward computation: {str(e)}")
return {
"empathy": 0.0,
"toxicity": 1.0,
"bias": 1.0,
"hallucination": 1.0,
"reward": 0.0
}