File size: 2,605 Bytes
b69b713 b3dae95 2c12a96 b69b713 2c12a96 b3dae95 b69b713 2c12a96 b69b713 b3dae95 1612ccf b3dae95 b69b713 b3dae95 b69b713 b3dae95 b69b713 b3dae95 78c5c64 1612ccf 78c5c64 b69b713 1612ccf b69b713 78c5c64 1612ccf b3dae95 b69b713 b3dae95 b69b713 b3dae95 b69b713 1612ccf 4d94977 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# paraphraser.py
from model_loader import paraphraser_model
import time
def paraphrase_comment(comment):
"""
Paraphrase a given comment using the fine-tuned Granite 3.2-2B-Instruct model to make it non-toxic, empathetic, and professional while retaining the original intent.
Returns the paraphrased comment.
"""
try:
start_time = time.time()
print(f"Starting paraphrasing for comment: {comment[:50]}...")
# Access the model and tokenizer
model = paraphraser_model.model
tokenizer = paraphraser_model.tokenizer
# Define the prompt for paraphrasing
prompt = (
f"You are a content moderator tasked with paraphrasing a comment to make it non-toxic, empathetic, and professional while retaining the original intent. "
f"The original comment is: \"{comment}\". "
f"Guidelines: "
f"- Remove any hate speech, offensive language, or toxic elements. "
f"- Use a neutral or positive tone. "
f"- Ensure the paraphrased comment is concise and clear. "
f"- Maintain the core message or intent of the original comment. "
f"Provide the paraphrased comment only, without additional explanation."
)
# Tokenize the prompt with attention mask
inputs = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
padding=True,
max_length=256,
return_attention_mask=True
).to(model.device)
# Generate the paraphrased comment using max_new_tokens
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=50, # Generate up to 50 new tokens
num_beams=2,
no_repeat_ngram_size=2,
early_stopping=True
)
# Decode the output
paraphrased_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean up the output (remove the prompt and any extra text)
if prompt in paraphrased_comment:
paraphrased_comment = paraphrased_comment.replace(prompt, "").strip()
paraphrased_comment = paraphrased_comment.strip()
print(f"Paraphrasing completed in {time.time() - start_time:.2f} seconds")
return paraphrased_comment if paraphrased_comment else "Error: Unable to generate paraphrase."
except Exception as e:
print(f"Error during paraphrasing: {str(e)}")
return f"Error: Unable to generate paraphrase: {str(e)}" |