Spaces:

yixuantt
/

User-Study

Sleeping

App Files Files

User-Study / app.py

yixuantt

Upload app.py

b2ab1d4 verified about 2 months ago

raw

history blame

13 kB

	import gradio as gr
	import json
	from datetime import datetime
	import os
	import logging

	def _setup_logger():
	log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s")
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)

	console_handler = logging.StreamHandler()
	console_handler.setFormatter(log_format)
	logger.handlers = [console_handler]

	return logger

	logger = _setup_logger()

	DATA_DIR = "annotations_data2"
	os.makedirs(DATA_DIR, exist_ok=True)

	with open("test_pairs2.json", "r") as f:
	response_pairs = json.load(f)

	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');

	body {
	font-family: 'Roboto', sans-serif !important;
	line-height: 1.6;
	}

	.panel {
	border: 1px solid #e5e7eb !important;
	border-radius: 12px !important;
	padding: 20px !important;
	}

	button {
	font-weight: 500 !important;
	transition: all 0.2s ease !important;
	font-family: 'Roboto', sans-serif !important;
	}

	button:hover {
	transform: translateY(-1px);
	}

	.progress {
	color: #4f46e5;
	font-weight: 500;
	}

	textarea {
	border-radius: 8px !important;
	padding: 12px !important;
	font-family: 'Roboto', sans-serif !important;
	}

	.selected-response {
	border: 2px solid #4f46e5 !important;
	background-color: #f5f3ff;
	}

	.instruction-panel {
	background: #f8f9fa !important;
	border: 1px solid #e0e0e0 !important;
	border-radius: 12px !important;
	padding: 25px !important;
	margin-bottom: 25px !important;
	}

	.criteria-list {
	margin-left: 20px !important;
	list-style-type: none !important;
	}

	.criteria-item {
	padding: 8px 0 !important;
	}

	.highlight {
	color: #4f46e5;
	font-weight: 500;
	}
	"""

	class State:
	def __init__(self):
	self.current_idx = 0
	self.prolific_id = ""
	self.annotations = []
	self.start_time = datetime.now()

	state = State()

	def save_annotations():
	if not state.prolific_id:
	return
	filename = f"{state.prolific_id}_latest.json"
	filepath = os.path.join(DATA_DIR, filename)
	data = {
	"prolific_id": state.prolific_id,
	"duration": (datetime.now() - state.start_time).total_seconds(),
	"current_idx": state.current_idx,
	"annotations": state.annotations
	}
	with open(filepath, "w") as f:
	json.dump(data, f, indent=2)
	logger.info(f"Saved annotations to {filepath}")
	return filepath

	def load_latest_data(prolific_id):
	filename = f"{prolific_id}_latest.json"
	filepath = os.path.join(DATA_DIR, filename)
	if os.path.exists(filepath):
	try:
	data = json.load(open(filepath))
	data["current_idx"] = min(max(data["current_idx"], 0), len(response_pairs)-1)
	return data
	except Exception as e:
	logger.error(f"Error loading {filepath}: {e}")
	return None

	INSTRUCTION = """
	### Welcome! 🎉

	In this task, you'll act as a judge comparing two AI chatbot responses. Your goal is to determine which response is better based on specific criteria.

	### 📋 Task Overview:
	- You'll evaluate multiple questions (prompts), each with two responses (Response A and B)
	- Select the better response for each question based on the criteria below
	- Your progress will be tracked

	### 🏅 Evaluation Criteria:
	1. Perceived Usefulness
	→ Does the answer address the question effectively and provide relevant information?
	2. Social Presence
	→ Does the answer creates "the feeling of being there with a 'real' person"?


	### 🚀 Getting Started:
	1. Input your Prolific ID to begin
	2. Read the question carefully
	3. Compare both responses side-by-side
	4. Select the better response using the radio buttons
	5. Provide optional feedback and confidence rating
	6. Click "Next" to continue or "Previous" to review

	Note: You must select a response and confidence level before proceeding to the next question.

	We do not expect any risks beyond what you’d experience in daily life from joining this study. You’ll just read questions and answers, pick your favorite, and rate your confidence—nothing stressful or harmful. It’s as safe as reading a webpage or filling out a short survey.

	Thank you for contributing to our research! Your input is valuable.
	"""

	MINI_INSTRUCTION = """You’ll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one that’s better based on: Helpfulness (answers well, gives useful info), Clarity (clear, logical, on topic), and Emotion (understands feelings, fits the situation).

	Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!
	"""

	def create_interface():
	with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo:
	# User ID Section
	with gr.Column(visible=True, elem_id="id_section") as id_section:
	with gr.Column(elem_classes="instruction-panel"):
	gr.Markdown(INSTRUCTION)
	gr.Markdown("---")
	gr.Markdown("## Prolific ID Verification")
	prolific_id = gr.Textbox(label="Enter your Prolific ID")
	id_submit_btn = gr.Button("Submit", variant="primary")

	# Main Interface
	with gr.Column(visible=False, elem_id="main_interface") as main_interface:
	progress_md = gr.Markdown("Progress: 0% (0/0)", elem_classes="progress")
	gr.HTML('<style>.prompt-highlight { background-color: #e6f7ff; padding: 10px; border: 1px solid #91d5ff; border-radius: 5px; }</style>')
	gr.Markdown(MINI_INSTRUCTION)
	gr.Markdown("---")
	gr.Markdown("### Current Question")
	prompt_box = gr.Markdown(elem_classes="prompt-highlight")
	with gr.Row():
	with gr.Column(variant="panel"):
	gr.Markdown("### Response A")
	response_a = gr.Markdown(height='200px')
	with gr.Column(variant="panel"):
	gr.Markdown("### Response B")
	response_b = gr.Markdown(height='200px')
	selection_radio = gr.Radio(
	choices=[("Response A", "A"), ("Response B", "B")],
	label="Select the better response",
	)
	feedback = gr.Textbox(label="Additional Feedback (optional)", lines=3)
	confidence = gr.Radio(
	choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)],
	label="Confidence Rating",
	)
	with gr.Row():
	prev_btn = gr.Button("Previous", variant="secondary")
	next_btn = gr.Button("Next", variant="primary")

	# Completion Section
	with gr.Column(visible=False, elem_id="completion") as completion_section:
	gr.Markdown("# Thank You!")
	gr.Markdown("### Completion code: `CA7IOI65`")
	completion_md = gr.Markdown("Your annotations have been saved.")
	gr.HTML("""
	<p>Click <a href="https://app.prolific.com/researcher/submissions/complete?cc=CA7IOI65" target="_blank">here</a> to complete the task.</p>
	""")

	def handle_id_submit(prolific_id_val):
	if not prolific_id_val.strip():
	raise gr.Error("Please enter a valid Prolific ID")
	state.prolific_id = prolific_id_val.strip()
	data = load_latest_data(state.prolific_id)

	if data:
	state.annotations = data.get("annotations", [])
	state.current_idx = data.get("current_idx", 0)
	if state.current_idx >= len(response_pairs):
	save_annotations()
	return {
	id_section: gr.update(visible=False),
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=True)
	}
	else:
	state.annotations = []
	state.current_idx = 0

	return {
	id_section: gr.update(visible=False),
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	**update_interface(state.current_idx)
	}

	def update_interface(idx):
	if idx >= len(response_pairs):
	idx = len(response_pairs) - 1
	current_data = response_pairs[idx] if idx < len(response_pairs) else {}
	progress = f"Progress: {idx/len(response_pairs):.0%} ({idx}/{len(response_pairs)})"
	annotation = state.annotations[idx] if idx < len(state.annotations) else None
	return {
	prompt_box: current_data.get("prompt", ""),
	response_a: current_data.get("responseA", ""),
	response_b: current_data.get("responseB", ""),
	progress_md: progress,
	feedback: annotation["feedback"] if annotation else "",
	confidence: annotation["confidence"] if annotation else None,
	selection_radio: annotation["selected"] if annotation else None
	}

	def handle_navigation(direction, selection, confidence_val, feedback_val):
	error_msg = None
	if direction == "next":
	if not selection:
	error_msg = "Please select a response before proceeding."
	if not confidence_val:
	error_msg = "Please select a confidence level before proceeding."

	if error_msg:
	gr.Warning(error_msg)
	return {
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	**update_interface(state.current_idx)
	}

	# Save current annotation
	if selection and confidence_val:
	annotation = {
	"id": response_pairs[state.current_idx]["id"], # Save unique ID
	"prompt": response_pairs[state.current_idx]["prompt"],
	"selected": selection,
	"confidence": confidence_val,
	"feedback": feedback_val,
	"timestamp": datetime.now().isoformat()
	}
	if state.current_idx < len(state.annotations):
	state.annotations[state.current_idx] = annotation
	else:
	state.annotations.append(annotation)

	# Navigation logic
	try:
	new_idx = state.current_idx + 1 if direction == "next" else max(0, state.current_idx - 1)
	state.current_idx = new_idx
	save_annotations()

	if new_idx >= len(response_pairs):
	return {
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=True),
	**update_interface(new_idx)
	}

	return {
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	**update_interface(new_idx)
	}

	except Exception as e:
	logger.error(f"Navigation error: {e}")
	return {
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	**update_interface(state.current_idx)
	}

	# Event bindings
	id_submit_btn.click(
	handle_id_submit,
	inputs=prolific_id,
	outputs=[id_section, main_interface, completion_section, prompt_box,
	response_a, response_b, progress_md, feedback, confidence, selection_radio]
	)

	prev_btn.click(
	handle_navigation,
	inputs=[gr.State("prev"), selection_radio, confidence, feedback],
	outputs=[main_interface, completion_section, prompt_box, response_a,
	response_b, progress_md, feedback, confidence, selection_radio]
	)

	next_btn.click(
	handle_navigation,
	inputs=[gr.State("next"), selection_radio, confidence, feedback],
	outputs=[main_interface, completion_section, prompt_box, response_a,
	response_b, progress_md, feedback, confidence, selection_radio]
	)

	return demo

	if __name__ == "__main__":
	app = create_interface()
	app.launch()