Update paraphraser.py
Browse files- paraphraser.py +11 -10
paraphraser.py
CHANGED
@@ -18,25 +18,26 @@ def paraphrase_comment(comment):
|
|
18 |
"You are a content moderator tasked with rewriting toxic comments into neutral and constructive ones while maintaining the original meaning. "
|
19 |
"Follow these guidelines:\n"
|
20 |
"- Remove explicit hate speech, personal attacks, or offensive language.\n"
|
21 |
-
"- Keep the response neutral and
|
22 |
-
"- Ensure the rewritten comment retains the original intent but in a constructive tone.\n\n"
|
23 |
"Examples:\n"
|
24 |
"Toxic: \"You're so dumb! You never understand anything!\"\n"
|
25 |
-
"Neutral: \"I think there
|
26 |
"Toxic: \"This is the worst idea ever. Only an idiot would suggest this.\"\n"
|
27 |
-
"Neutral: \"I
|
|
|
|
|
28 |
f"Now, rewrite this comment: \"{comment}\""
|
29 |
)
|
30 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
31 |
|
32 |
-
# Generate the paraphrased comment
|
33 |
outputs = model.generate(
|
34 |
**inputs,
|
35 |
-
max_length=
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
do_sample=True
|
40 |
)
|
41 |
|
42 |
paraphrased_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
18 |
"You are a content moderator tasked with rewriting toxic comments into neutral and constructive ones while maintaining the original meaning. "
|
19 |
"Follow these guidelines:\n"
|
20 |
"- Remove explicit hate speech, personal attacks, or offensive language.\n"
|
21 |
+
"- Keep the response neutral and conversational, suitable for a casual online platform.\n"
|
22 |
+
"- Ensure the rewritten comment retains the original intent but in a constructive tone, addressing the specific context of the comment (e.g., disagreement, frustration).\n\n"
|
23 |
"Examples:\n"
|
24 |
"Toxic: \"You're so dumb! You never understand anything!\"\n"
|
25 |
+
"Neutral: \"I think there might be a misunderstanding here. Can we go over this again to clear things up?\"\n"
|
26 |
"Toxic: \"This is the worst idea ever. Only an idiot would suggest this.\"\n"
|
27 |
+
"Neutral: \"I’m not sure this idea works for me. Could we look at some other options instead?\"\n"
|
28 |
+
"Toxic: \"You are an idiot and should leave this platform.\"\n"
|
29 |
+
"Neutral: \"It seems like you might not be enjoying this platform. Maybe we can talk about what’s not working for you?\"\n\n"
|
30 |
f"Now, rewrite this comment: \"{comment}\""
|
31 |
)
|
32 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
33 |
|
34 |
+
# Generate the paraphrased comment with optimized parameters
|
35 |
outputs = model.generate(
|
36 |
**inputs,
|
37 |
+
max_length=50, # Reduced max_length for short comments
|
38 |
+
num_beams=4, # Use beam search for faster and more consistent generation
|
39 |
+
early_stopping=True, # Stop generation once a good sequence is found
|
40 |
+
do_sample=False # Disable sampling to use beam search
|
|
|
41 |
)
|
42 |
|
43 |
paraphrased_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)
|