Spaces:

JanviMl
/

toxic-comment-classifier

Running

File size: 2,714 Bytes

b69b713
 
2c12a96
b69b713
2c12a96
b69b713
 
2c12a96
b69b713
 
 
 
 
 
 
 
 
 
 
 
d0ff307
 
 
b69b713
 
d0ff307
b69b713
d0ff307
 
 
 
 
b69b713
 
 
 
2c8a781
b69b713
 
7fa4f70
2c8a781
 
 
b69b713
 
 
 
 
5a980db
 
 
b69b713

# paraphraser.py
from model_loader import paraphraser_model

def paraphrase_comment(comment):
    """
    Paraphrase a toxic comment using the Granite 3.2-2B-Instruct model.
    Returns the paraphrased comment.
    """
    if not comment:
        return None

    try:
        model = paraphraser_model.model
        tokenizer = paraphraser_model.tokenizer

        # Create a detailed prompt with guidelines and examples
        prompt = (
            "You are a content moderator tasked with rewriting toxic comments into neutral and constructive ones while maintaining the original meaning. "
            "Follow these guidelines:\n"
            "- Remove explicit hate speech, personal attacks, or offensive language.\n"
            "- Keep the response neutral and professional.\n"
            "- Ensure the rewritten comment retains the original intent but in a constructive tone.\n"
            "- Match the length and brevity of the original toxic comment whenever possible. Keep the response short and to the point.\n\n"
            "Examples:\n"
            "Toxic: \"You're so dumb! You never understand anything!\"\n"
            "Neutral: \"You might be misunderstanding this.\"\n"
            "Toxic: \"This is the worst idea ever. Only an idiot would suggest this.\"\n"
            "Neutral: \"I don’t think this idea works well.\"\n"
            "Toxic: \"You’re useless.\"\n"
            "Neutral: \"This isn’t helping much.\"\n"
            "Toxic: \"Shut up.\"\n"
            "Neutral: \"Let’s take a break from this.\"\n\n"
            f"Now, rewrite this comment: \"{comment}\""
        )
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)

        # Generate the paraphrased comment with optimized parameters
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,  # Specify the number of new tokens to generate (excludes input length)
            num_beams=4,  # Use beam search for faster and more consistent generation
            early_stopping=True,  # Stop generation once a good sequence is found
            do_sample=False  # Disable sampling to use beam search
        )

        paraphrased_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Remove the prompt part from the output
        paraphrased_comment = paraphrased_comment.replace(prompt, "").strip()
        # Remove unwanted prefixes like "Neutral: "
        if paraphrased_comment.startswith("Neutral: "):
            paraphrased_comment = paraphrased_comment[len("Neutral: "):].strip()
        return paraphrased_comment

    except Exception as e:
        return f"Error paraphrasing comment: {str(e)}"