Spaces:

yixuantt
/

User-Study

Sleeping

App Files Files

yixuantt commited on Apr 7

Commit

a044e1e

verified ·

1 Parent(s): 4088fe7

Upload 2 files

Browse files

Files changed (2) hide show

app.py +336 -0
test_pairs2.json +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,336 @@

+import gradio as gr
+import json
+from datetime import datetime
+import os
+import logging
+def _setup_logger():
+    log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s")
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(log_format)
+    logger.handlers = [console_handler]
+    return logger
+logger = _setup_logger()
+DATA_DIR = "annotations_data2"
+os.makedirs(DATA_DIR, exist_ok=True)
+with open("test_pairs2.json", "r") as f:
+    response_pairs = json.load(f)
+custom_css = """
+@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
+body {
+    font-family: 'Roboto', sans-serif !important;
+    line-height: 1.6;
+}
+.panel {
+    border: 1px solid #e5e7eb !important;
+    border-radius: 12px !important;
+    padding: 20px !important;
+}
+button {
+    font-weight: 500 !important;
+    transition: all 0.2s ease !important;
+    font-family: 'Roboto', sans-serif !important;
+}
+button:hover {
+    transform: translateY(-1px);
+}
+.progress {
+    color: #4f46e5;
+    font-weight: 500;
+}
+textarea {
+    border-radius: 8px !important;
+    padding: 12px !important;
+    font-family: 'Roboto', sans-serif !important;
+}
+.selected-response {
+    border: 2px solid #4f46e5 !important;
+    background-color: #f5f3ff;
+}
+.instruction-panel {
+    background: #f8f9fa !important;
+    border: 1px solid #e0e0e0 !important;
+    border-radius: 12px !important;
+    padding: 25px !important;
+    margin-bottom: 25px !important;
+}
+.criteria-list {
+    margin-left: 20px !important;
+    list-style-type: none !important;
+}
+.criteria-item {
+    padding: 8px 0 !important;
+}
+.highlight {
+    color: #4f46e5;
+    font-weight: 500;
+}
+"""
+class State:
+    def __init__(self):
+        self.current_idx = 0
+        self.prolific_id = ""
+        self.annotations = []
+        self.start_time = datetime.now()
+state = State()
+def save_annotations():
+    if not state.prolific_id:
+        return
+    filename = f"{state.prolific_id}_latest.json"
+    filepath = os.path.join(DATA_DIR, filename)
+    data = {
+        "prolific_id": state.prolific_id,
+        "duration": (datetime.now() - state.start_time).total_seconds(),
+        "current_idx": state.current_idx,
+        "annotations": state.annotations
+    }
+    with open(filepath, "w") as f:
+        json.dump(data, f, indent=2)
+    logger.info(f"Saved annotations to {filepath}")
+    return filepath
+def load_latest_data(prolific_id):
+    filename = f"{prolific_id}_latest.json"
+    filepath = os.path.join(DATA_DIR, filename)
+    if os.path.exists(filepath):
+        try:
+            data = json.load(open(filepath))
+            data["current_idx"] = min(max(data["current_idx"], 0), len(response_pairs)-1)
+            return data
+        except Exception as e:
+            logger.error(f"Error loading {filepath}: {e}")
+    return None
+INSTRUCTION = """
+### Welcome! 🎉
+In this task, you'll act as a judge comparing two AI chatbot responses. Your goal is to determine which response is better based on specific criteria.
+### 📋 Task Overview:
+- You'll evaluate multiple questions (prompts), each with two responses (Response A and B)
+- Select the better response for each question based on the criteria below
+- Your progress will be tracked
+### 🏅 Evaluation Criteria:
+1. **Perceived Usefulness**
+   → Does the answer address the question effectively and provide relevant information?
+2. **Social Presence**
+   → Does the answer creates "the feeling of being there with a 'real' person"?
+### 🚀 Getting Started:
+1. Input your Prolific ID to begin
+2. Read the question carefully
+3. Compare both responses side-by-side
+4. Select the better response using the radio buttons
+5. Provide optional feedback and confidence rating
+6. Click "Next" to continue or "Previous" to review
+**Note:** You must select a response and confidence level before proceeding to the next question.
+*Thank you for contributing to our research! Your input is valuable.*
+"""
+MINI_INSTRUCTION = """You’ll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one that’s better based on: Helpfulness (answers well, gives useful info), Clarity (clear, logical, on topic), and Emotion (understands feelings, fits the situation).
+*Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!*
+"""
+def create_interface():
+    with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo:
+        # User ID Section
+        with gr.Column(visible=True, elem_id="id_section") as id_section:
+            with gr.Column(elem_classes="instruction-panel"):
+                gr.Markdown(INSTRUCTION)
+            gr.Markdown("---")
+            gr.Markdown("## Prolific ID Verification")
+            prolific_id = gr.Textbox(label="Enter your Prolific ID")
+            id_submit_btn = gr.Button("Submit", variant="primary")
+        # Main Interface
+        with gr.Column(visible=False, elem_id="main_interface") as main_interface:
+            progress_md = gr.Markdown("**Progress:** 0% (0/0)", elem_classes="progress")
+            gr.HTML('<style>.prompt-highlight { background-color: #e6f7ff; padding: 10px; border: 1px solid #91d5ff; border-radius: 5px; }</style>')
+            gr.Markdown(MINI_INSTRUCTION)
+            gr.Markdown("---")
+            gr.Markdown("### Current Question")
+            prompt_box = gr.Markdown(elem_classes="prompt-highlight")
+            with gr.Row():
+                with gr.Column(variant="panel"):
+                    gr.Markdown("### Response A")
+                    response_a = gr.Markdown(height='200px')
+                with gr.Column(variant="panel"):
+                    gr.Markdown("### Response B")
+                    response_b = gr.Markdown(height='200px')
+            selection_radio = gr.Radio(
+                choices=[("Response A", "A"), ("Response B", "B")],
+                label="Select the better response",
+            )
+            feedback = gr.Textbox(label="Additional Feedback (optional)", lines=3)
+            confidence = gr.Radio(
+                choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)],
+                label="Confidence Rating",
+            )
+            with gr.Row():
+                prev_btn = gr.Button("Previous", variant="secondary")
+                next_btn = gr.Button("Next", variant="primary")
+        # Completion Section
+        with gr.Column(visible=False, elem_id="completion") as completion_section:
+            gr.Markdown("# Thank You!")
+            gr.Markdown("### Completion code: `CA7IOI65`")
+            completion_md = gr.Markdown("Your annotations have been saved.")
+            gr.HTML("""
+                <p>Click <a href="https://app.prolific.com/researcher/submissions/complete?cc=CA7IOI65" target="_blank">here</a> to complete the task.</p>
+            """)
+        def handle_id_submit(prolific_id_val):
+            if not prolific_id_val.strip():
+                raise gr.Error("Please enter a valid Prolific ID")
+            state.prolific_id = prolific_id_val.strip()
+            data = load_latest_data(state.prolific_id)
+            if data:
+                state.annotations = data.get("annotations", [])
+                state.current_idx = data.get("current_idx", 0)
+                if state.current_idx >= len(response_pairs):
+                    save_annotations()
+                    return {
+                        id_section: gr.update(visible=False),
+                        main_interface: gr.update(visible=False),
+                        completion_section: gr.update(visible=True)
+                    }
+            else:
+                state.annotations = []
+                state.current_idx = 0
+            return {
+                id_section: gr.update(visible=False),
+                main_interface: gr.update(visible=True),
+                completion_section: gr.update(visible=False),
+                **update_interface(state.current_idx)
+            }
+        def update_interface(idx):
+            if idx >= len(response_pairs):
+                idx = len(response_pairs) - 1
+            current_data = response_pairs[idx] if idx < len(response_pairs) else {}
+            progress = f"**Progress:** {idx/len(response_pairs):.0%} ({idx}/{len(response_pairs)})"
+            annotation = state.annotations[idx] if idx < len(state.annotations) else None
+            return {
+                prompt_box: current_data.get("prompt", ""),
+                response_a: current_data.get("responseA", ""),
+                response_b: current_data.get("responseB", ""),
+                progress_md: progress,
+                feedback: annotation["feedback"] if annotation else "",
+                confidence: annotation["confidence"] if annotation else None,
+                selection_radio: annotation["selected"] if annotation else None
+            }
+        def handle_navigation(direction, selection, confidence_val, feedback_val):
+            error_msg = None
+            if direction == "next":
+                if not selection:
+                    error_msg = "Please select a response before proceeding."
+                if not confidence_val:
+                    error_msg = "Please select a confidence level before proceeding."
+            if error_msg:
+                gr.Warning(error_msg)
+                return {
+                    main_interface: gr.update(visible=True),
+                    completion_section: gr.update(visible=False),
+                    **update_interface(state.current_idx)
+                }
+            # Save current annotation
+            if selection and confidence_val:
+                annotation = {
+                    "id": response_pairs[state.current_idx]["id"],  # Save unique ID
+                    "prompt": response_pairs[state.current_idx]["prompt"],
+                    "selected": selection,
+                    "confidence": confidence_val,
+                    "feedback": feedback_val,
+                    "timestamp": datetime.now().isoformat()
+                }
+                if state.current_idx < len(state.annotations):
+                    state.annotations[state.current_idx] = annotation
+                else:
+                    state.annotations.append(annotation)
+            # Navigation logic
+            try:
+                new_idx = state.current_idx + 1 if direction == "next" else max(0, state.current_idx - 1)
+                state.current_idx = new_idx
+                save_annotations()
+                if new_idx >= len(response_pairs):
+                    return {
+                        main_interface: gr.update(visible=False),
+                        completion_section: gr.update(visible=True),
+                        **update_interface(new_idx)
+                    }
+                return {
+                    main_interface: gr.update(visible=True),
+                    completion_section: gr.update(visible=False),
+                    **update_interface(new_idx)
+                }
+            except Exception as e:
+                logger.error(f"Navigation error: {e}")
+                return {
+                    main_interface: gr.update(visible=True),
+                    completion_section: gr.update(visible=False),
+                    **update_interface(state.current_idx)
+                }
+        # Event bindings
+        id_submit_btn.click(
+            handle_id_submit,
+            inputs=prolific_id,
+            outputs=[id_section, main_interface, completion_section, prompt_box,
+                    response_a, response_b, progress_md, feedback, confidence, selection_radio]
+        )
+        prev_btn.click(
+            handle_navigation,
+            inputs=[gr.State("prev"), selection_radio, confidence, feedback],
+            outputs=[main_interface, completion_section, prompt_box, response_a,
+                    response_b, progress_md, feedback, confidence, selection_radio]
+        )
+        next_btn.click(
+            handle_navigation,
+            inputs=[gr.State("next"), selection_radio, confidence, feedback],
+            outputs=[main_interface, completion_section, prompt_box, response_a,
+                    response_b, progress_md, feedback, confidence, selection_radio]
+        )
+    return demo
+if __name__ == "__main__":
+    app = create_interface()
+    app.launch(server_name="0.0.0.0", server_port=7861, share=True)

test_pairs2.json ADDED Viewed

The diff for this file is too large to render. See raw diff