JanviMl commited on
Commit
59e622f
·
verified ·
1 Parent(s): af98023

Update classifier.py

Browse files
Files changed (1) hide show
  1. classifier.py +43 -12
classifier.py CHANGED
@@ -1,21 +1,23 @@
1
  # classifier.py
2
  import torch
3
- from model_loader import model, tokenizer
 
4
 
5
  def classify_toxic_comment(comment):
6
  """
7
  Classify a comment as toxic or non-toxic using the fine-tuned XLM-RoBERTa model.
8
- Returns the prediction label, confidence, color, toxicity score, and bias score for UI display.
 
9
  """
10
  if not comment.strip():
11
- return "Error: Please enter a comment.", None, None, None, None
12
 
13
  # Tokenize the input comment
14
- inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
15
 
16
  # Run inference
17
  with torch.no_grad():
18
- outputs = model(**inputs)
19
  logits = outputs.logits
20
 
21
  # Get the predicted class (0 = non-toxic, 1 = toxic)
@@ -24,14 +26,43 @@ def classify_toxic_comment(comment):
24
  confidence = torch.softmax(logits, dim=1)[0][predicted_class].item()
25
  label_color = "red" if label == "Toxic" else "green"
26
 
27
- # Simulate Toxicity Score (in a real scenario, use a model like Detoxify)
28
- # For now, we'll approximate it based on the confidence of the toxic class
29
- toxicity_score = torch.softmax(logits, dim=1)[0][1].item() # Probability of toxic class
30
  toxicity_score = round(toxicity_score, 2)
31
 
32
- # Simulate Bias Score (in a real scenario, use a bias detection model like WEAT)
33
- # For now, we'll use a placeholder value (since the example comment is non-toxic)
34
- bias_score = 0.01 if label == "Non-Toxic" else 0.15 # Placeholder logic
35
  bias_score = round(bias_score, 2)
36
 
37
- return f"Prediction: {label}", confidence, label_color, toxicity_score, bias_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # classifier.py
2
  import torch
3
+ from model_loader import classifier_model, classifier_tokenizer
4
+ from paraphraser import paraphrase_comment
5
 
6
  def classify_toxic_comment(comment):
7
  """
8
  Classify a comment as toxic or non-toxic using the fine-tuned XLM-RoBERTa model.
9
+ If toxic, paraphrase the comment and re-evaluate.
10
+ Returns the prediction label, confidence, color, toxicity score, bias score, paraphrased comment (if applicable), and its metrics.
11
  """
12
  if not comment.strip():
13
+ return "Error: Please enter a comment.", None, None, None, None, None, None, None, None, None
14
 
15
  # Tokenize the input comment
16
+ inputs = classifier_tokenizer(comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
17
 
18
  # Run inference
19
  with torch.no_grad():
20
+ outputs = classifier_model(**inputs)
21
  logits = outputs.logits
22
 
23
  # Get the predicted class (0 = non-toxic, 1 = toxic)
 
26
  confidence = torch.softmax(logits, dim=1)[0][predicted_class].item()
27
  label_color = "red" if label == "Toxic" else "green"
28
 
29
+ # Compute Toxicity Score (approximated as the probability of the toxic class)
30
+ toxicity_score = torch.softmax(logits, dim=1)[0][1].item()
 
31
  toxicity_score = round(toxicity_score, 2)
32
 
33
+ # Simulate Bias Score (placeholder)
34
+ bias_score = 0.01 if label == "Non-Toxic" else 0.15
 
35
  bias_score = round(bias_score, 2)
36
 
37
+ # If the comment is toxic, paraphrase it
38
+ paraphrased_comment = None
39
+ paraphrased_prediction = None
40
+ paraphrased_confidence = None
41
+ paraphrased_color = None
42
+ paraphrased_toxicity_score = None
43
+ paraphrased_bias_score = None
44
+
45
+ if label == "Toxic":
46
+ # Paraphrase the comment
47
+ paraphrased_comment = paraphrase_comment(comment)
48
+
49
+ # Re-evaluate the paraphrased comment
50
+ paraphrased_inputs = classifier_tokenizer(paraphrased_comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
51
+ with torch.no_grad():
52
+ paraphrased_outputs = classifier_model(**paraphrased_inputs)
53
+ paraphrased_logits = paraphrased_outputs.logits
54
+
55
+ paraphrased_predicted_class = torch.argmax(paraphrased_logits, dim=1).item()
56
+ paraphrased_label = "Toxic" if paraphrased_predicted_class == 1 else "Non-Toxic"
57
+ paraphrased_confidence = torch.softmax(paraphrased_logits, dim=1)[0][paraphrased_predicted_class].item()
58
+ paraphrased_color = "red" if paraphrased_label == "Toxic" else "green"
59
+ paraphrased_toxicity_score = torch.softmax(paraphrased_logits, dim=1)[0][1].item()
60
+ paraphrased_toxicity_score = round(paraphrased_toxicity_score, 2)
61
+ paraphrased_bias_score = 0.01 if paraphrased_label == "Non-Toxic" else 0.15 # Placeholder
62
+ paraphrased_bias_score = round(paraphrased_bias_score, 2)
63
+
64
+ return (
65
+ f"Prediction: {label}", confidence, label_color, toxicity_score, bias_score,
66
+ paraphrased_comment, f"Prediction: {paraphrased_label}" if paraphrased_comment else None,
67
+ paraphrased_confidence, paraphrased_color, paraphrased_toxicity_score, paraphrased_bias_score
68
+ )