File size: 4,613 Bytes
829572e
685f359
 
 
 
829572e
685f359
 
829572e
685f359
829572e
685f359
 
829572e
685f359
 
 
 
 
 
 
 
829572e
685f359
829572e
685f359
 
829572e
685f359
 
 
 
 
 
 
 
 
 
 
 
829572e
685f359
 
 
 
 
829572e
685f359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829572e
685f359
 
 
 
 
 
 
 
 
 
 
 
 
829572e
685f359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829572e
685f359
829572e
685f359
 
829572e
685f359
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# metrics.py
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from model_loader import metrics_models

# Download required NLTK data
nltk.download('punkt')

def compute_semantic_similarity(original, paraphrased):
    """
    Compute semantic similarity between the original and paraphrased comment using Sentence-BERT.
    Returns a similarity score between 0 and 1.
    """
    try:
        sentence_bert = metrics_models.load_sentence_bert()
        embeddings = sentence_bert.encode([original, paraphrased])
        similarity = float(embeddings[0] @ embeddings[1].T)
        return round(similarity, 2)
    except Exception as e:
        print(f"Error computing semantic similarity: {str(e)}")
        return None

def compute_emotion_shift(original, paraphrased):
    """
    Compute the emotion shift between the original and paraphrased comment.
    Returns the original emotion, paraphrased emotion, and whether the shift is positive.
    """
    try:
        emotion_classifier = metrics_models.load_emotion_classifier()
        original_emotions = emotion_classifier(original)
        paraphrased_emotions = emotion_classifier(paraphrased)

        # Get the top emotion for each
        original_emotion = max(original_emotions[0], key=lambda x: x['score'])['label']
        paraphrased_emotion = max(paraphrased_emotions[0], key=lambda x: x['score'])['label']

        # Define negative and positive emotions
        negative_emotions = ['anger', 'sadness', 'fear']
        positive_emotions = ['joy', 'love', 'surprise']

        # Determine if the shift is positive
        emotion_shift_positive = (
            (original_emotion in negative_emotions and paraphrased_emotion in positive_emotions) or
            (original_emotion in negative_emotions and paraphrased_emotion not in negative_emotions)
        )

        return original_emotion, paraphrased_emotion, emotion_shift_positive
    except Exception as e:
        print(f"Error computing emotion shift: {str(e)}")
        return None, None, None

def compute_empathy_score(paraphrased):
    """
    Compute an empathy score for the paraphrased comment (placeholder).
    Returns a score between 0 and 1.
    """
    try:
        # Placeholder: Compute empathy based on word presence (e.g., "sorry", "understand")
        empathy_words = ["sorry", "understand", "care", "help", "support"]
        words = paraphrased.lower().split()
        empathy_count = sum(1 for word in words if word in empathy_words)
        score = empathy_count / len(words) if words else 0
        return round(score, 2)
    except Exception as e:
        print(f"Error computing empathy score: {str(e)}")
        return None

def compute_bleu_score(original, paraphrased):
    """
    Compute the BLEU score between the original and paraphrased comment.
    Returns a score between 0 and 1.
    """
    try:
        reference = [nltk.word_tokenize(original.lower())]
        candidate = nltk.word_tokenize(paraphrased.lower())
        score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))
        return round(score, 2)
    except Exception as e:
        print(f"Error computing BLEU score: {str(e)}")
        return None

def compute_rouge_score(original, paraphrased):
    """
    Compute ROUGE scores (ROUGE-1, ROUGE-2, ROUGE-L) between the original and paraphrased comment.
    Returns a dictionary with ROUGE scores.
    """
    try:
        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
        scores = scorer.score(original, paraphrased)
        return {
            'rouge1': round(scores['rouge1'].fmeasure, 2),
            'rouge2': round(scores['rouge2'].fmeasure, 2),
            'rougeL': round(scores['rougeL'].fmeasure, 2)
        }
    except Exception as e:
        print(f"Error computing ROUGE scores: {str(e)}")
        return None

def compute_entailment_score(original, paraphrased):
    """
    Compute the entailment score to check factual consistency using an NLI model.
    Returns a score between 0 and 1.
    """
    try:
        nli_classifier = metrics_models.load_nli_classifier()
        result = nli_classifier(
            original,
            paraphrased,
            candidate_labels=["entailment", "contradiction", "neutral"]
        )
        entailment_score = next(score for label, score in zip(result['labels'], result['scores']) if label == "entailment")
        return round(entailment_score, 2)
    except Exception as e:
        print(f"Error computing entailment score: {str(e)}")
        return None