Spaces:
Sleeping
Sleeping
# paraphraser.py | |
import torch | |
from model_loader import paraphrase_model, paraphrase_tokenizer | |
def paraphrase_comment(comment): | |
""" | |
Paraphrase a toxic comment using the Granite 3.2-2B-Instruct model. | |
Returns the paraphrased comment. | |
""" | |
# Define the paraphrasing prompt with system instruction, guidelines, examples, and the task | |
prompt = ( | |
"You are a content moderator tasked with rewriting toxic comments into neutral and constructive ones while maintaining the original meaning.\n" | |
"Guidelines:\n" | |
"- Remove explicit hate speech, personal attacks, or offensive language.\n" | |
"- Keep the response neutral and professional.\n" | |
"- Ensure the rewritten comment retains the original intent but in a constructive tone.\n" | |
"Examples:\n" | |
"Toxic: \"You're so dumb! You never understand anything!\"\n" | |
"Neutral: \"I think there's some misunderstanding. Let's clarify things.\"\n" | |
"Toxic: \"This is the worst idea ever. Only an idiot would suggest this.\"\n" | |
"Neutral: \"I don't think this idea works well. Maybe we can explore other options.\"\n" | |
"Now, rewrite this comment: \"{comment}\"" | |
) | |
# Format the prompt with the input comment | |
prompt = prompt.format(comment=comment) | |
# Tokenize the prompt | |
inputs = paraphrase_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512) | |
# Generate the paraphrased output | |
with torch.no_grad(): | |
outputs = paraphrase_model.generate( | |
**inputs, | |
max_length=512, | |
num_return_sequences=1, | |
do_sample=True, | |
top_p=0.95, | |
temperature=0.7 | |
) | |
# Decode the generated output | |
paraphrased_comment = paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Remove the prompt part from the output (if the model includes it) | |
paraphrased_comment = paraphrased_comment.replace(prompt, "").strip() | |
return paraphrased_comment |