# app.py
import gradio as gr
from classifier import classify_toxic_comment
# Clear function for resetting the UI
def clear_inputs():
return "", 0, "", [], "", "", "", "", 0, "", "", "", "", ""
# Custom CSS for styling
custom_css = """
.gr-button-primary {
background-color: #4CAF50 !important;
color: white !important;
}
.gr-button-secondary {
background-color: #f44336 !important;
color: white !important;
}
.gr-textbox textarea {
border: 2px solid #2196F3 !important;
border-radius: 8px !important;
}
.gr-slider {
background-color: #e0e0e0 !important;
border-radius: 10px !important;
}
"""
# Main UI function
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
gr.Markdown(
"""
# Toxic Comment Classifier
Enter a comment below to check if it's toxic or non-toxic. This app uses a fine-tuned XLM-RoBERTa model to classify comments as part of a four-stage pipeline for automated toxic comment moderation.
"""
)
with gr.Row():
with gr.Column(scale=3):
comment_input = gr.Textbox(
label="Your Comment",
placeholder="Type your comment here...",
lines=3,
max_lines=5
)
with gr.Column(scale=1):
submit_btn = gr.Button("Classify Comment", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
gr.Examples(
examples=[
"I love this community, it's so supportive!",
"You are an idiot and should leave this platform.",
"This app is amazing, great work!"
],
inputs=comment_input,
label="Try these examples:"
)
with gr.Row():
with gr.Column(scale=2):
prediction_output = gr.Textbox(label="Prediction", placeholder="Prediction will appear here...")
toxicity_output = gr.Textbox(label="Toxicity Score", placeholder="Toxicity score will appear here...")
bias_output = gr.Textbox(label="Bias Score", placeholder="Bias score will appear here...")
with gr.Column(scale=1):
confidence_output = gr.Slider(
label="Confidence",
minimum=0,
maximum=1,
value=0,
interactive=False
)
with gr.Row():
label_display = gr.HTML()
threshold_display = gr.HTML()
with gr.Accordion("Paraphrased Output (if Toxic)", open=False):
paraphrased_comment_output = gr.Textbox(label="Paraphrased Comment", placeholder="Paraphrased comment will appear here if the input is toxic...")
paraphrased_prediction_output = gr.Textbox(label="Paraphrased Prediction", placeholder="Prediction will appear here...")
paraphrased_toxicity_output = gr.Textbox(label="Paraphrased Toxicity Score", placeholder="Toxicity score will appear here...")
paraphrased_bias_output = gr.Textbox(label="Paraphrased Bias Score", placeholder="Bias score will appear here...")
paraphrased_confidence_output = gr.Slider(
label="Paraphrased Confidence",
minimum=0,
maximum=1,
value=0,
interactive=False
)
paraphrased_label_display = gr.HTML()
semantic_similarity_output = gr.Textbox(label="Semantic Similarity", placeholder="Semantic similarity score will appear here...")
emotion_shift_output = gr.Textbox(label="Emotion Shift", placeholder="Emotion shift will appear here...")
empathy_score_output = gr.Textbox(label="Empathy Score", placeholder="Empathy score will appear here...")
with gr.Accordion("Prediction History", open=False):
history_output = gr.JSON(label="Previous Predictions")
with gr.Accordion("Provide Feedback", open=False):
feedback_input = gr.Radio(
choices=["Yes, the prediction was correct", "No, the prediction was incorrect"],
label="Was this prediction correct?"
)
feedback_comment = gr.Textbox(label="Additional Comments (optional)", placeholder="Let us know your thoughts...")
feedback_submit = gr.Button("Submit Feedback")
feedback_output = gr.Textbox(label="Feedback Status")
def handle_classification(comment, history):
if history is None:
history = []
(
prediction, confidence, color, toxicity_score, bias_score,
paraphrased_comment, paraphrased_prediction, paraphrased_confidence,
paraphrased_color, paraphrased_toxicity_score, paraphrased_bias_score,
semantic_similarity, emotion_shift, empathy_score
) = classify_toxic_comment(comment)
history.append({
"comment": comment,
"prediction": prediction,
"confidence": confidence,
"toxicity_score": toxicity_score,
"bias_score": bias_score,
"paraphrased_comment": paraphrased_comment,
"paraphrased_prediction": paraphrased_prediction,
"paraphrased_confidence": paraphrased_confidence,
"paraphrased_toxicity_score": paraphrased_toxicity_score,
"paraphrased_bias_score": paraphrased_bias_score,
"semantic_similarity": semantic_similarity,
"emotion_shift": emotion_shift,
"empathy_score": empathy_score
})
threshold_message = "High Confidence" if confidence >= 0.7 else "Low Confidence"
threshold_color = "green" if confidence >= 0.7 else "orange"
toxicity_display = f"{toxicity_score} (Scale: 0 to 1, lower is less toxic)" if toxicity_score is not None else "N/A"
bias_display = f"{bias_score} (Scale: 0 to 1, lower indicates less bias)" if bias_score is not None else "N/A"
paraphrased_comment_display = paraphrased_comment if paraphrased_comment else "N/A (Comment was non-toxic)"
paraphrased_prediction_display = paraphrased_prediction if paraphrased_prediction else "N/A"
paraphrased_confidence_display = paraphrased_confidence if paraphrased_confidence else 0
paraphrased_toxicity_display = f"{paraphrased_toxicity_score} (Scale: 0 to 1, lower is less toxic)" if paraphrased_toxicity_score is not None else "N/A"
paraphrased_bias_display = f"{paraphrased_bias_score} (Scale: 0 to 1, lower indicates less bias)" if paraphrased_bias_score is not None else "N/A"
paraphrased_label_html = f"{paraphrased_prediction}" if paraphrased_prediction else ""
semantic_similarity_display = f"{semantic_similarity} (Scale: 0 to 1, higher is better)" if semantic_similarity is not None else "N/A"
emotion_shift_display = emotion_shift if emotion_shift else "N/A"
empathy_score_display = f"{empathy_score} (Scale: 0 to 1, higher indicates more empathy)" if empathy_score is not None else "N/A"
return (
prediction, confidence, color, history, threshold_message, threshold_color,
toxicity_display, bias_display,
paraphrased_comment_display, paraphrased_prediction_display, paraphrased_confidence_display,
paraphrased_toxicity_display, paraphrased_bias_display, paraphrased_label_html,
semantic_similarity_display, emotion_shift_display, empathy_score_display
)
def handle_feedback(feedback, comment):
return f"Thank you for your feedback: {feedback}\nAdditional comment: {comment}"
submit_btn.click(
fn=lambda: ("Classifying...", 0, "", None, "", "", "Calculating...", "Calculating...", "Paraphrasing...", "Calculating...", 0, "Calculating...", "Calculating...", "", "Calculating...", "Calculating...", "Calculating..."), # Show loading state
inputs=[],
outputs=[
prediction_output, confidence_output, label_display, history_output, threshold_display, threshold_display,
toxicity_output, bias_output,
paraphrased_comment_output, paraphrased_prediction_output, paraphrased_confidence_output,
paraphrased_toxicity_output, paraphrased_bias_output, paraphrased_label_display,
semantic_similarity_output, emotion_shift_output, empathy_score_output
]
).then(
fn=handle_classification,
inputs=[comment_input, history_output],
outputs=[
prediction_output, confidence_output, label_display, history_output, threshold_display, threshold_display,
toxicity_output, bias_output,
paraphrased_comment_output, paraphrased_prediction_output, paraphrased_confidence_output,
paraphrased_toxicity_output, paraphrased_bias_output, paraphrased_label_display,
semantic_similarity_output, emotion_shift_output, empathy_score_output
]
).then(
fn=lambda prediction, confidence, color: f"{prediction}",
inputs=[prediction_output, confidence_output, label_display],
outputs=label_display
).then(
fn=lambda threshold_message, threshold_color: f"{threshold_message}",
inputs=[threshold_display, threshold_display],
outputs=threshold_display
)
feedback_submit.click(
fn=handle_feedback,
inputs=[feedback_input, feedback_comment],
outputs=feedback_output
)
clear_btn.click(
fn=clear_inputs,
inputs=[],
outputs=[
comment_input, confidence_output, label_display, history_output, toxicity_output, bias_output,
paraphrased_comment_output, paraphrased_prediction_output, paraphrased_confidence_output,
paraphrased_toxicity_output, paraphrased_bias_output, paraphrased_label_display,
semantic_similarity_output, emotion_shift_output, empathy_score_output
]
)
gr.Markdown(
"""
---
**About**: This app is part of a four-stage pipeline for automated toxic comment moderation with emotional intelligence via RLHF. Built with ❤️ using Hugging Face and Gradio.
"""
)
demo.launch()