import gradio as gr import json from datetime import datetime import os import logging def _setup_logger(): log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s") logger = logging.getLogger() logger.setLevel(logging.INFO) console_handler = logging.StreamHandler() console_handler.setFormatter(log_format) logger.handlers = [console_handler] return logger logger = _setup_logger() DATA_DIR = "annotations_data2" os.makedirs(DATA_DIR, exist_ok=True) with open("test_pairs2.json", "r") as f: response_pairs = json.load(f) custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap'); body { font-family: 'Roboto', sans-serif !important; line-height: 1.6; } .panel { border: 1px solid #e5e7eb !important; border-radius: 12px !important; padding: 20px !important; } button { font-weight: 500 !important; transition: all 0.2s ease !important; font-family: 'Roboto', sans-serif !important; } button:hover { transform: translateY(-1px); } .progress { color: #4f46e5; font-weight: 500; } textarea { border-radius: 8px !important; padding: 12px !important; font-family: 'Roboto', sans-serif !important; } .selected-response { border: 2px solid #4f46e5 !important; background-color: #f5f3ff; } .instruction-panel { background: #f8f9fa !important; border: 1px solid #e0e0e0 !important; border-radius: 12px !important; padding: 25px !important; margin-bottom: 25px !important; } .criteria-list { margin-left: 20px !important; list-style-type: none !important; } .criteria-item { padding: 8px 0 !important; } .highlight { color: #4f46e5; font-weight: 500; } """ class State: def __init__(self): self.current_idx = 0 self.prolific_id = "" self.annotations = [] self.start_time = datetime.now() state = State() def save_annotations(): if not state.prolific_id: return filename = f"{state.prolific_id}_latest.json" filepath = os.path.join(DATA_DIR, filename) data = { "prolific_id": state.prolific_id, "duration": (datetime.now() - state.start_time).total_seconds(), "current_idx": state.current_idx, "annotations": state.annotations } with open(filepath, "w") as f: json.dump(data, f, indent=2) logger.info(f"Saved annotations to {filepath}") return filepath def load_latest_data(prolific_id): filename = f"{prolific_id}_latest.json" filepath = os.path.join(DATA_DIR, filename) if os.path.exists(filepath): try: data = json.load(open(filepath)) data["current_idx"] = min(max(data["current_idx"], 0), len(response_pairs)-1) return data except Exception as e: logger.error(f"Error loading {filepath}: {e}") return None INSTRUCTION = """ ### Welcome! πŸŽ‰ In this task, you'll act as a judge comparing two AI chatbot responses. Your goal is to determine which response is better based on specific criteria. ### πŸ“‹ Task Overview: - You'll evaluate multiple questions (prompts), each with two responses (Response A and B) - Select the better response for each question based on the criteria below - Your progress will be tracked ### πŸ… Evaluation Criteria: 1. **Perceived Usefulness** β†’ Does the answer address the question effectively and provide relevant information? 2. **Social Presence** β†’ Does the answer creates "the feeling of being there with a 'real' person"? ### πŸš€ Getting Started: 1. Input your Prolific ID to begin 2. Read the question carefully 3. Compare both responses side-by-side 4. Select the better response using the radio buttons 5. Provide optional feedback and confidence rating 6. Click "Next" to continue or "Previous" to review **Note:** You must select a response and confidence level before proceeding to the next question. *Thank you for contributing to our research! Your input is valuable.* """ MINI_INSTRUCTION = """You’ll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one that’s better based on: Helpfulness (answers well, gives useful info), Clarity (clear, logical, on topic), and Emotion (understands feelings, fits the situation). *Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!* """ def create_interface(): with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo: # User ID Section with gr.Column(visible=True, elem_id="id_section") as id_section: with gr.Column(elem_classes="instruction-panel"): gr.Markdown(INSTRUCTION) gr.Markdown("---") gr.Markdown("## Prolific ID Verification") prolific_id = gr.Textbox(label="Enter your Prolific ID") id_submit_btn = gr.Button("Submit", variant="primary") # Main Interface with gr.Column(visible=False, elem_id="main_interface") as main_interface: progress_md = gr.Markdown("**Progress:** 0% (0/0)", elem_classes="progress") gr.HTML('') gr.Markdown(MINI_INSTRUCTION) gr.Markdown("---") gr.Markdown("### Current Question") prompt_box = gr.Markdown(elem_classes="prompt-highlight") with gr.Row(): with gr.Column(variant="panel"): gr.Markdown("### Response A") response_a = gr.Markdown(height='200px') with gr.Column(variant="panel"): gr.Markdown("### Response B") response_b = gr.Markdown(height='200px') selection_radio = gr.Radio( choices=[("Response A", "A"), ("Response B", "B")], label="Select the better response", ) feedback = gr.Textbox(label="Additional Feedback (optional)", lines=3) confidence = gr.Radio( choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)], label="Confidence Rating", ) with gr.Row(): prev_btn = gr.Button("Previous", variant="secondary") next_btn = gr.Button("Next", variant="primary") # Completion Section with gr.Column(visible=False, elem_id="completion") as completion_section: gr.Markdown("# Thank You!") gr.Markdown("### Completion code: `CA7IOI65`") completion_md = gr.Markdown("Your annotations have been saved.") gr.HTML("""

Click here to complete the task.

""") def handle_id_submit(prolific_id_val): if not prolific_id_val.strip(): raise gr.Error("Please enter a valid Prolific ID") state.prolific_id = prolific_id_val.strip() data = load_latest_data(state.prolific_id) if data: state.annotations = data.get("annotations", []) state.current_idx = data.get("current_idx", 0) if state.current_idx >= len(response_pairs): save_annotations() return { id_section: gr.update(visible=False), main_interface: gr.update(visible=False), completion_section: gr.update(visible=True) } else: state.annotations = [] state.current_idx = 0 return { id_section: gr.update(visible=False), main_interface: gr.update(visible=True), completion_section: gr.update(visible=False), **update_interface(state.current_idx) } def update_interface(idx): if idx >= len(response_pairs): idx = len(response_pairs) - 1 current_data = response_pairs[idx] if idx < len(response_pairs) else {} progress = f"**Progress:** {idx/len(response_pairs):.0%} ({idx}/{len(response_pairs)})" annotation = state.annotations[idx] if idx < len(state.annotations) else None return { prompt_box: current_data.get("prompt", ""), response_a: current_data.get("responseA", ""), response_b: current_data.get("responseB", ""), progress_md: progress, feedback: annotation["feedback"] if annotation else "", confidence: annotation["confidence"] if annotation else None, selection_radio: annotation["selected"] if annotation else None } def handle_navigation(direction, selection, confidence_val, feedback_val): error_msg = None if direction == "next": if not selection: error_msg = "Please select a response before proceeding." if not confidence_val: error_msg = "Please select a confidence level before proceeding." if error_msg: gr.Warning(error_msg) return { main_interface: gr.update(visible=True), completion_section: gr.update(visible=False), **update_interface(state.current_idx) } # Save current annotation if selection and confidence_val: annotation = { "id": response_pairs[state.current_idx]["id"], # Save unique ID "prompt": response_pairs[state.current_idx]["prompt"], "selected": selection, "confidence": confidence_val, "feedback": feedback_val, "timestamp": datetime.now().isoformat() } if state.current_idx < len(state.annotations): state.annotations[state.current_idx] = annotation else: state.annotations.append(annotation) # Navigation logic try: new_idx = state.current_idx + 1 if direction == "next" else max(0, state.current_idx - 1) state.current_idx = new_idx save_annotations() if new_idx >= len(response_pairs): return { main_interface: gr.update(visible=False), completion_section: gr.update(visible=True), **update_interface(new_idx) } return { main_interface: gr.update(visible=True), completion_section: gr.update(visible=False), **update_interface(new_idx) } except Exception as e: logger.error(f"Navigation error: {e}") return { main_interface: gr.update(visible=True), completion_section: gr.update(visible=False), **update_interface(state.current_idx) } # Event bindings id_submit_btn.click( handle_id_submit, inputs=prolific_id, outputs=[id_section, main_interface, completion_section, prompt_box, response_a, response_b, progress_md, feedback, confidence, selection_radio] ) prev_btn.click( handle_navigation, inputs=[gr.State("prev"), selection_radio, confidence, feedback], outputs=[main_interface, completion_section, prompt_box, response_a, response_b, progress_md, feedback, confidence, selection_radio] ) next_btn.click( handle_navigation, inputs=[gr.State("next"), selection_radio, confidence, feedback], outputs=[main_interface, completion_section, prompt_box, response_a, response_b, progress_md, feedback, confidence, selection_radio] ) return demo if __name__ == "__main__": app = create_interface() app.launch(server_name="0.0.0.0", server_port=7861, share=True)