toxic-comment-classifier_rlhf

Sleeping

App Files Files Community

toxic-comment-classifier_rlhf / app.py

JanviMl

Update app.py

95613a1 verified 9 days ago

raw

history blame contribute delete

15.4 kB

	# app.py
	import gradio as gr
	import csv
	import os
	from datetime import datetime
	from classifier import classify_toxic_comment

	def clear_inputs():
	"""
	Reset all UI input and output fields to their default values.
	Returns a tuple of empty or default values for all UI components.
	"""
	return "", 0, "", [], "", "", "", "", 0, "", "", "", "", "" # Updated to 14 values

	custom_css = """
	/* General Styling */
	body {
	font-family: 'Roboto', sans-serif;
	background-color: #F5F7FA;
	color: #333333;
	}

	/* Header Styling */
	h1 {
	color: #FFFFFF !important;
	background-color: #1E88E5;
	padding: 20px;
	border-radius: 10px;
	text-align: center;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
	margin-bottom: 20px;
	}

	/* Section Headers */
	h3 {
	color: #1E88E5;
	font-weight: 600;
	margin-bottom: 15px;
	border-bottom: 2px solid #1E88E5;
	padding-bottom: 5px;
	}

	/* Input Textbox */
	.gr-textbox textarea {
	border: 2px solid #1E88E5 !important;
	border-radius: 10px !important;
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
	transition: border-color 0.3s, box-shadow 0.3s;
	}
	.gr-textbox textarea:focus {
	border-color: #1565C0 !important;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
	}

	/* Buttons */
	.gr-button-primary {
	background-color: #1E88E5 !important;
	color: white !important;
	border-radius: 10px !important;
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
	transition: background-color 0.3s, transform 0.1s;
	font-weight: 500;
	}
	.gr-button-primary:hover {
	background-color: #1565C0 !important;
	transform: translateY(-2px);
	}
	.gr-button-secondary {
	background-color: #D32F2F !important;
	color: white !important;
	border-radius: 10px !important;
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
	transition: background-color 0.3s, transform 0.1s;
	font-weight: 500;
	}
	.gr-button-secondary:hover {
	background-color: #B71C1C !important;
	transform: translateY(-2px);
	}

	/* Sliders */
	.gr-slider {
	background-color: #E0E0E0 !important;
	border-radius: 10px !important;
	box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
	}

	/* Output Boxes */
	.gr-textbox {
	border: 1px solid #E0E0E0 !important;
	border-radius: 10px !important;
	background-color: #FFFFFF !important;
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
	padding: 10px;
	margin-bottom: 10px;
	}

	/* Accordion */
	.gr-accordion {
	border: 1px solid #E0E0E0 !important;
	border-radius: 10px !important;
	background-color: #FFFFFF !important;
	margin-bottom: 15px;
	}

	/* Custom Classes for Visual Indicators */
	.toxic-indicator::before {
	content: "⚠️ ";
	color: #D32F2F;
	font-size: 20px;
	}
	.nontoxic-indicator::before {
	content: "✅ ";
	color: #388E3C;
	font-size: 20px;
	}

	/* Loading State Animation */
	@keyframes pulse {
	0% { opacity: 1; }
	50% { opacity: 0.5; }
	100% { opacity: 1; }
	}
	.loading {
	animation: pulse 1.5s infinite;
	}
	"""

	with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
	gr.Markdown(
	"""
	# Toxic Comment Classifier
	Enter a comment below to check if it's toxic or non-toxic. This app uses a fine-tuned XLM-RoBERTa model to classify comments, paraphrases toxic comments, and evaluates the output with advanced metrics.
	"""
	)

	with gr.Row():
	with gr.Column(scale=4, min_width=600):
	comment_input = gr.Textbox(
	label="Your Comment",
	placeholder="Type your comment here...",
	lines=3,
	max_lines=5
	)
	with gr.Column(scale=1, min_width=200):
	submit_btn = gr.Button("Classify Comment", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	gr.Examples(
	examples=[
	"I love this community, it's so supportive!",
	"You are an idiot and should leave this platform.",
	"This app is amazing, great work!"
	],
	inputs=comment_input,
	label="Try these examples:"
	)

	with gr.Row():
	with gr.Column(scale=1, min_width=400):
	gr.Markdown("### Original Comment Analysis")
	prediction_output = gr.Textbox(label="Prediction", placeholder="Prediction will appear here...")
	label_display = gr.HTML()
	confidence_output = gr.Slider(
	label="Confidence",
	minimum=0,
	maximum=1,
	value=0,
	interactive=False
	)
	toxicity_output = gr.Textbox(label="Toxicity Score", placeholder="Toxicity score will appear here...")
	bias_output = gr.Textbox(label="Bias Score", placeholder="Bias score will appear here...")
	threshold_display = gr.HTML()

	with gr.Column(scale=1, min_width=400):
	with gr.Accordion("Paraphrased Output (if Toxic)", open=False):
	paraphrased_comment_output = gr.Textbox(label="Paraphrased Comment", placeholder="Paraphrased comment will appear here if the input is toxic...")
	paraphrased_prediction_output = gr.Textbox(label="Paraphrased Prediction", placeholder="Prediction will appear here...")
	paraphrased_label_display = gr.HTML()
	paraphrased_confidence_output = gr.Slider(
	label="Paraphrased Confidence",
	minimum=0,
	maximum=1,
	value=0,
	interactive=False
	)
	paraphrased_toxicity_output = gr.Textbox(label="Paraphrased Toxicity Score", placeholder="Toxicity score will appear here...")
	paraphrased_bias_output = gr.Textbox(label="Paraphrased Bias Score", placeholder="Bias score will appear here...")
	semantic_similarity_output = gr.Textbox(label="Semantic Similarity", placeholder="Semantic similarity score will appear here...")
	empathy_score_output = gr.Textbox(label="Empathy Score", placeholder="Empathy score will appear here...")

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Accordion("Prediction History", open=False):
	history_output = gr.JSON(label="Previous Predictions")

	with gr.Column(scale=1):
	with gr.Accordion("Provide Feedback", open=False):
	feedback_input = gr.Radio(
	choices=["Yes, the prediction was correct", "No, the prediction was incorrect"],
	label="Was this prediction correct?"
	)
	feedback_comment = gr.Textbox(label="Additional Comments (optional)", placeholder="Let us know your thoughts...")
	feedback_submit = gr.Button("Submit Feedback")
	feedback_output = gr.Textbox(label="Feedback Status")

	with gr.Row():
	with gr.Column():
	refine_btn = gr.Button("Run Iterative Refinement (Stage 4)", variant="primary")
	refine_status = gr.Textbox(label="Refinement Status", placeholder="Status will appear here...")

	def handle_classification(comment, history):
	if history is None:
	history = []
	(
	prediction, confidence, color, toxicity_score, bias_score,
	paraphrased_comment, paraphrased_prediction, paraphrased_confidence,
	paraphrased_color, paraphrased_toxicity_score, paraphrased_bias_score,
	semantic_similarity, empathy_score
	) = classify_toxic_comment(comment)

	history.append({
	"comment": comment,
	"prediction": prediction,
	"confidence": confidence,
	"toxicity_score": toxicity_score,
	"bias_score": bias_score,
	"paraphrased_comment": paraphrased_comment,
	"paraphrased_prediction": paraphrased_prediction,
	"paraphrased_confidence": paraphrased_confidence,
	"paraphrased_toxicity_score": paraphrased_toxicity_score,
	"paraphrased_bias_score": paraphrased_bias_score,
	"semantic_similarity": semantic_similarity,
	"empathy_score": empathy_score
	})

	threshold_message = "High Confidence" if confidence >= 0.7 else "Low Confidence"
	threshold_color = "green" if confidence >= 0.7 else "orange"
	toxicity_display = f"{toxicity_score} (Scale: 0 to 1, lower is less toxic)" if toxicity_score is not None else "N/A"
	bias_display = f"{bias_score} (Scale: 0 to 1, lower indicates less bias)" if bias_score is not None else "N/A"

	paraphrased_comment_display = paraphrased_comment if paraphrased_comment else "N/A (Comment was non-toxic)"
	paraphrased_prediction_display = paraphrased_prediction if paraphrased_prediction else "N/A"
	paraphrased_confidence_display = paraphrased_confidence if paraphrased_confidence else 0
	paraphrased_toxicity_display = f"{paraphrased_toxicity_score} (Scale: 0 to 1, lower is less toxic)" if paraphrased_toxicity_score is not None else "N/A"
	paraphrased_bias_display = f"{paraphrased_bias_score} (Scale: 0 to 1, lower indicates less bias)" if paraphrased_bias_score is not None else "N/A"
	paraphrased_label_html = (
	f"<span class='{'toxic-indicator' if 'Toxic' in paraphrased_prediction else 'nontoxic-indicator'}' "
	f"style='color: {paraphrased_color}; font-size: 20px; font-weight: bold;'>{paraphrased_prediction}</span>"
	if paraphrased_prediction else ""
	)
	semantic_similarity_display = f"{semantic_similarity} (Scale: 0 to 1, higher is better)" if semantic_similarity is not None else "N/A"
	empathy_score_display = f"{empathy_score} (Scale: 0 to 1, higher indicates more empathy)" if empathy_score is not None else "N/A"

	prediction_class = "toxic-indicator" if "Toxic" in prediction else "nontoxic-indicator"
	prediction_html = f"<span class='{prediction_class}' style='color: {color}; font-size: 20px; font-weight: bold;'>{prediction}</span>"

	return (
	prediction, confidence, prediction_html, history, threshold_message, threshold_color,
	toxicity_display, bias_display,
	paraphrased_comment_display, paraphrased_prediction_display, paraphrased_confidence_display,
	paraphrased_toxicity_display, paraphrased_bias_display, paraphrased_label_html,
	semantic_similarity_display, empathy_score_display
	)

	def handle_feedback(feedback, additional_comment, comment, prediction, confidence):
	"""
	Handle user feedback and store it in a CSV file.
	"""
	if not feedback:
	return "Please select a feedback option before submitting."

	# Define the CSV file path
	csv_file_path = "/home/user/app/feedback.csv"

	# Check if the CSV file exists; if not, create it with headers
	file_exists = os.path.isfile(csv_file_path)
	with open(csv_file_path, mode='a', newline='', encoding='utf-8') as csv_file:
	fieldnames = ['timestamp', 'comment', 'prediction', 'confidence', 'feedback', 'additional_comment']
	writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

	if not file_exists:
	writer.writeheader() # Write headers if the file is newly created

	# Write the feedback to the CSV
	writer.writerow({
	'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	'comment': comment,
	'prediction': prediction,
	'confidence': confidence,
	'feedback': feedback,
	'additional_comment': additional_comment if additional_comment else "N/A"
	})

	return f"Thank you for your feedback: {feedback}\nAdditional comment: {additional_comment if additional_comment else 'None'}\nFeedback has been saved."

	def run_refinement():
	try:
	from refine_paraphrases import main
	main()
	return "Refinement complete. Results saved to iterated_paraphrases.csv and pushed to JanviMl/toxi_iterated_paraphrases."
	except Exception as e:
	return f"Error running refinement: {str(e)}"

	submit_btn.click(
	fn=lambda: (
	"Classifying... <span class='loading'>⏳</span>", 0, "", None, "", "",
	"Calculating... <span class='loading'>⏳</span>", "Calculating... <span class='loading'>⏳</span>",
	"Paraphrasing... <span class='loading'>⏳</span>", "Calculating... <span class='loading'>⏳</span>", 0,
	"Calculating... <span class='loading'>⏳</span>", "Calculating... <span class='loading'>⏳</span>", "",
	"Calculating... <span class='loading'>⏳</span>", "Calculating... <span class='loading'>⏳</span>"
	),
	inputs=[],
	outputs=[
	prediction_output, confidence_output, label_display, history_output, threshold_display, threshold_display,
	toxicity_output, bias_output,
	paraphrased_comment_output, paraphrased_prediction_output, paraphrased_confidence_output,
	paraphrased_toxicity_output, paraphrased_bias_output, paraphrased_label_display,
	semantic_similarity_output, empathy_score_output
	]
	).then(
	fn=handle_classification,
	inputs=[comment_input, history_output],
	outputs=[
	prediction_output, confidence_output, label_display, history_output, threshold_display, threshold_display,
	toxicity_output, bias_output,
	paraphrased_comment_output, paraphrased_prediction_output, paraphrased_confidence_output,
	paraphrased_toxicity_output, paraphrased_bias_output, paraphrased_label_display,
	semantic_similarity_output, empathy_score_output
	]
	).then(
	fn=lambda prediction, confidence, html: html,
	inputs=[prediction_output, confidence_output, label_display],
	outputs=label_display
	).then(
	fn=lambda threshold_message, threshold_color: f"<span style='color: {threshold_color}; font-size: 16px;'>{threshold_message}</span>",
	inputs=[threshold_display, threshold_display],
	outputs=threshold_display
	)

	feedback_submit.click(
	fn=handle_feedback,
	inputs=[feedback_input, feedback_comment, comment_input, prediction_output, confidence_output],
	outputs=feedback_output
	)

	clear_btn.click(
	fn=clear_inputs,
	inputs=[],
	outputs=[
	comment_input, confidence_output, label_display, history_output, toxicity_output, bias_output,
	paraphrased_comment_output, paraphrased_prediction_output, paraphrased_confidence_output,
	paraphrased_toxicity_output, paraphrased_bias_output, paraphrased_label_display,
	semantic_similarity_output, empathy_score_output
	]
	)

	refine_btn.click(
	fn=run_refinement,
	inputs=[],
	outputs=[refine_status]
	)

	gr.Markdown(
	"""
	---
	About: This app is part of a four-stage pipeline for automated toxic comment moderation with emotional intelligence via RLHF. Built with ❤️ using Hugging Face and Gradio.
	"""
	)

	demo.launch()