toxic-comment-classifier_rlhf

Sleeping

App Files Files Community

JanviMl commited on Mar 25

Commit

106e766

verified ·

1 Parent(s): 0553ede

Update classifier.py

Browse files

Files changed (1) hide show

classifier.py +18 -8

classifier.py CHANGED Viewed

@@ -1,8 +1,8 @@
 # classifier.py
 import torch
-from model_loader import classifier_model, classifier_tokenizer
 from paraphraser import paraphrase_comment
-from metrics import compute_semantic_similarity, compute_emotion_shift, compute_empathy_score
 def classify_toxic_comment(comment):
     """
@@ -11,14 +11,18 @@ def classify_toxic_comment(comment):
     Returns the prediction label, confidence, color, toxicity score, bias score, paraphrased comment (if applicable), and its metrics.
     """
     if not comment.strip():
-        return "Error: Please enter a comment.", None, None, None, None, None, None, None, None, None, None, None, None, None
     # Tokenize the input comment
-    inputs = classifier_tokenizer(comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Run inference
     with torch.no_grad():
-        outputs = classifier_model(**inputs)
         logits = outputs.logits
     # Get the predicted class (0 = non-toxic, 1 = toxic)
@@ -47,15 +51,18 @@ def classify_toxic_comment(comment):
     paraphrased_emotion = None
     emotion_shift_positive = None
     empathy_score = None
     if label == "Toxic":
         # Paraphrase the comment
         paraphrased_comment = paraphrase_comment(comment)
         # Re-evaluate the paraphrased comment
-        paraphrased_inputs = classifier_tokenizer(paraphrased_comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
         with torch.no_grad():
-            paraphrased_outputs = classifier_model(**paraphrased_inputs)
             paraphrased_logits = paraphrased_outputs.logits
         paraphrased_predicted_class = torch.argmax(paraphrased_logits, dim=1).item()
@@ -71,11 +78,14 @@ def classify_toxic_comment(comment):
         semantic_similarity = compute_semantic_similarity(comment, paraphrased_comment)
         original_emotion, paraphrased_emotion, emotion_shift_positive = compute_emotion_shift(comment, paraphrased_comment)
         empathy_score = compute_empathy_score(paraphrased_comment)
     return (
         f"Prediction: {label}", confidence, label_color, toxicity_score, bias_score,
         paraphrased_comment, f"Prediction: {paraphrased_label}" if paraphrased_comment else None,
         paraphrased_confidence, paraphrased_color, paraphrased_toxicity_score, paraphrased_bias_score,
         semantic_similarity, f"Original: {original_emotion}, Paraphrased: {paraphrased_emotion}, Positive Shift: {emotion_shift_positive}" if original_emotion else None,
-        empathy_score
     )

 # classifier.py
 import torch
+from model.classifier import classifier_model
 from paraphraser import paraphrase_comment
+from metrics import compute_semantic_similarity, compute_emotion_shift, compute_empathy_score, compute_bleu_score, compute_rouge_score, compute_entailment_score
 def classify_toxic_comment(comment):
     """
     Returns the prediction label, confidence, color, toxicity score, bias score, paraphrased comment (if applicable), and its metrics.
     """
     if not comment.strip():
+        return "Error: Please enter a comment.", None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
+    # Access the model and tokenizer
+    model = classifier_model.model
+    tokenizer = classifier_model.tokenizer
     # Tokenize the input comment
+    inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Run inference
     with torch.no_grad():
+        outputs = model(**inputs)
         logits = outputs.logits
     # Get the predicted class (0 = non-toxic, 1 = toxic)
     paraphrased_emotion = None
     emotion_shift_positive = None
     empathy_score = None
+    bleu_score = None
+    rouge_scores = None
+    entailment_score = None
     if label == "Toxic":
         # Paraphrase the comment
         paraphrased_comment = paraphrase_comment(comment)
         # Re-evaluate the paraphrased comment
+        paraphrased_inputs = tokenizer(paraphrased_comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
         with torch.no_grad():
+            paraphrased_outputs = model(**paraphrased_inputs)
             paraphrased_logits = paraphrased_outputs.logits
         paraphrased_predicted_class = torch.argmax(paraphrased_logits, dim=1).item()
         semantic_similarity = compute_semantic_similarity(comment, paraphrased_comment)
         original_emotion, paraphrased_emotion, emotion_shift_positive = compute_emotion_shift(comment, paraphrased_comment)
         empathy_score = compute_empathy_score(paraphrased_comment)
+        bleu_score = compute_bleu_score(comment, paraphrased_comment)
+        rouge_scores = compute_rouge_score(comment, paraphrased_comment)
+        entailment_score = compute_entailment_score(comment, paraphrased_comment)
     return (
         f"Prediction: {label}", confidence, label_color, toxicity_score, bias_score,
         paraphrased_comment, f"Prediction: {paraphrased_label}" if paraphrased_comment else None,
         paraphrased_confidence, paraphrased_color, paraphrased_toxicity_score, paraphrased_bias_score,
         semantic_similarity, f"Original: {original_emotion}, Paraphrased: {paraphrased_emotion}, Positive Shift: {emotion_shift_positive}" if original_emotion else None,
+        empathy_score, bleu_score, rouge_scores, entailment_score
     )