Spaces:

yixuantt
/

User-Study

Sleeping

File size: 25,850 Bytes

import gradio as gr
import json
from datetime import datetime
import os
import logging
import random

# Logger setup (unchanged)
def _setup_logger():
    log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s")
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(log_format)
    logger.handlers = [console_handler]
    return logger

logger = _setup_logger()

DATA_DIR = "annotations_data2"
os.makedirs(DATA_DIR, exist_ok=True)

# Load questions from JSON (unchanged)
with open("test_pairs2.json", "r") as f:
    response_pairs = json.load(f)

# Function to generate assignments ensuring each question gets 7 labels
def generate_assignments(num_questions=120, num_annotators=30, labels_per_question=7, questions_per_annotator=28):
    assignments = {f"annotator_{i+1}": [] for i in range(num_annotators)}
    question_assignments = {i: [] for i in range(num_questions)}
    annotator_capacities = [questions_per_annotator] * num_annotators

    for q in range(num_questions):
        available_annotators = [(a, annotator_capacities[a]) for a in range(num_annotators) if annotator_capacities[a] > 0]
        if len(available_annotators) < labels_per_question:
            raise ValueError(f"Not enough annotators with capacity for question {q}")
        
        available_annotators.sort(key=lambda x: x[1], reverse=True)
        selected_annotators = [a for a, _ in available_annotators[:labels_per_question]]
        
        for a in selected_annotators:
            assignments[f"annotator_{a+1}"].append(q)
            question_assignments[q].append(a)
            annotator_capacities[a] -= 1

    return assignments, question_assignments

custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
body { font-family: 'Roboto', sans-serif !important; line-height: 1.6; }
.panel { border: 1px solid #e5e7eb !important; border-radius: 12px !important; padding: 20px !important; }
button { font-weight: 500 !important; transition: all 0.2s ease !important; font-family: 'Roboto', sans-serif !important; }
button:hover { transform: translateY(-1px); }
.progress { color: #4f46e5; font-weight: 500; }
textarea { border-radius: 8px !important; padding: 12px !important; font-family: 'Roboto', sans-serif !important; }
.selected-response { border: 2px solid #4f46e5 !important; background-color: #f5f3ff; }
.instruction-panel { background: #f8f9fa !important; border: 1px solid #e0e0e0 !important; border-radius: 12px !important; padding: 25px !important; margin-bottom: 25px !important; }
.criteria-list { margin-left: 20px !important; list-style-type: none !important; }
.criteria-item { padding: 8px 0 !important; }
.highlight { color: #4f46e5; font-weight: 500; }
"""

# Updated State class to include selected_indices, form_responses, and forms_completed
class State:
    def __init__(self):
        self.current_idx = 0
        self.prolific_id = ""
        self.selected_indices = []  # List of 28 question indices for this user
        self.annotations = []       # Annotations for the 28 questions
        self.form_responses = {}    # Responses to post-test forms
        self.forms_completed = False  # Flag for form completion
        self.start_time = datetime.now()

state = State()
ASSIGNED_FILE = "assigned.json"

def load_assigned():
    if os.path.exists(ASSIGNED_FILE):
        with open(ASSIGNED_FILE, "r") as f:
            return json.load(f)
    return {}

def save_assigned(assigned):
    with open(ASSIGNED_FILE, "w") as f:
        json.dump(assigned, f, indent=2)

def get_next_available_assignment(assigned, total_assignments=30):
    for i in range(1, total_assignments + 1):
        key = f"annotator_{i}"
        if key not in assigned.values():
            return key
    return None

# Updated save_annotations to include new fields
def save_annotations():
    if not state.prolific_id:
        return
    filename = f"{state.prolific_id}_latest.json"
    filepath = os.path.join(DATA_DIR, filename)
    data = {
        "prolific_id": state.prolific_id,
        "assignment_key": state.assignment_key,
        "selected_indices": state.selected_indices,
        "duration": (datetime.now() - state.start_time).total_seconds(),
        "current_idx": state.current_idx,
        "annotations": state.annotations,
        "form_responses": state.form_responses,
        "forms_completed": state.forms_completed
    }
    with open(filepath, "w") as f:
        json.dump(data, f, indent=2)
    logger.info(f"Saved annotations to {filepath}")
    return filepath

# Updated load_latest_data to load new fields
def load_latest_data(prolific_id):
    filename = f"{prolific_id}_latest.json"
    filepath = os.path.join(DATA_DIR, filename)
    if os.path.exists(filepath):
        try:
            data = json.load(open(filepath))
            state.selected_indices = data.get("selected_indices", [])
            state.annotations = data.get("annotations", [])
            state.form_responses = data.get("form_responses", {})
            state.forms_completed = data.get("forms_completed", False)
            state.current_idx = min(max(data.get("current_idx", 0), 0), 27)
            return data
        except Exception as e:
            logger.error(f"Error loading {filepath}: {e}")
    return None

INSTRUCTION = """
### Welcome! 🎉

In this task, you'll act as a judge comparing two AI chatbot responses. Your goal is to determine which response is better based on specific criteria.

### 📋 Task Overview:
- You'll evaluate multiple questions (prompts), each with two responses (Response A and B)
- Select the better response for each question based on the criteria below
- Your progress will be tracked
- After completing all questions, you'll answer a few post-test forms

### 🏅 Evaluation Criteria:
1. **Perceived Usefulness**  
   → Does the answer address the question effectively and provide relevant information?
2. **Social Presence**  
   → Does the answer creates "the feeling of being there with a 'real' person"?


### 🚀 Getting Started:
1. Input your Prolific ID to begin
2. Read the question carefully
3. Compare both responses side-by-side
4. Select the better response using the radio buttons
5. Provide optional feedback and confidence rating
6. Click "Next" to continue or "Previous" to review

**Note:** You need select a response and confidence level before proceeding to the next question.

*Thank you for contributing to our research! Your input is valuable.*  
"""

MINI_INSTRUCTION = """You’ll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one that’s better based on: Perceived Usefulness (answers well, gives useful info), and Social Presence (understands feelings, fits the situation).

*Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!*
"""

# Define post-test form questions (placeholders; replace with actual questions if available)
forms_questions = {
    "Neuro-QoL Cognition Function": [
        {"question": "In the past 7 days, I had to read something several times to understand it.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
        {"question": "In the past 7 days, I had to work really hard to pay attention or I would make a mistake.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
        {"question": "In the past 7 days, I had trouble concentrating.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
        {"question": "In the past 7 days, I had trouble remembering things.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]}
    ],
    "Wong and Law Emotional Intelligence Scale (WLEIS)": [
        # // SEA
        {"question": "I have a good sense of why I have certain feelings most of the time.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]},
        {"question": "I have good understanding of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I really understand what I feel.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I always know whether I am happy or not.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        # // OEA
        {"question": "I always know my friends’ emotions from their behavior.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I am a good observer of others’ emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I am sensitive to the feelings and emotions of others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I have good understanding of the emotions of people around me.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        # // UOE
        {"question": "I always set goals for myself and then try my best to achieve them.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I always tell myself I am a competent person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I am a self-motivated person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I would always encourage myself to try my best.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        # ROE
        {"question": "I am able to control my temper and handle difficulties rationally.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I can always calm down quickly when I am very angry.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I have good control of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I can always stay calm in stressful situations.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}
    ],
    "Algorithmic Aversion": [
        # Trust in LLM
        {"question": "I trust the answers provided by AI chatbots (e.g., ChatGPT) to be accurate.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]},
        {"question": "I feel confident relying on an AI chatbot for important tasks.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I worry that AI chatbots might give me incorrect information.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},

        # Preference for Human vs. LLM
        {"question": "I prefer asking a human expert over an AI chatbot for advice.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I would rather use a human-written article than one generated by an AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I find human interaction more valuable than interacting with an AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},

        # Willingness to Use LLM
        {"question": "I would avoid using an AI chatbot if I had other options.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I am willing to use an AI chatbot for daily tasks (e.g., writing, research).", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
        {"question": "I would recommend an AI chatbot to others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}
    ],
    "Demographics": [
        {"question": "What is your highest level of education?", "options": [
            "Less than high school", 
            "High school diploma", 
            "Some college", 
            "Associate's degree", 
            "Bachelor's degree", 
            "Master's degree", 
            "Doctoral degree"
        ]},
    ]
}

def create_interface():
    with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo:
        # User ID Section (unchanged layout)
        with gr.Column(visible=True, elem_id="id_section") as id_section:
            with gr.Column(elem_classes="instruction-panel"):
                gr.Markdown(INSTRUCTION)
            gr.Markdown("---")
            gr.Markdown("## Prolific ID Verification")
            prolific_id = gr.Textbox(label="Enter your Prolific ID")
            id_submit_btn = gr.Button("Submit", variant="primary")
            id_message = gr.Markdown("", visible=False)

        # Main Interface (updated for 28 questions)
        with gr.Column(visible=False, elem_id="main_interface") as main_interface:
            progress_md = gr.Markdown("**Progress:** 0% (0/28)", elem_classes="progress")
            gr.HTML('<style>.prompt-highlight { background-color: #e6f7ff; padding: 10px; border: 1px solid #91d5ff; border-radius: 5px; }</style>')
            gr.Markdown(MINI_INSTRUCTION)
            gr.Markdown("---")
            gr.Markdown("### Current Question From a User")
            prompt_box = gr.Markdown(elem_classes="prompt-highlight")
            with gr.Row():
                with gr.Column(variant="panel"):
                    gr.Markdown("### Response A")
                    response_a = gr.Markdown(height='200px')
                with gr.Column(variant="panel"):
                    gr.Markdown("### Response B")
                    response_b = gr.Markdown(height='200px')
            selection_radio = gr.Radio(
                choices=[("Response A", "A"), ("Response B", "B")],
                label="Select the better response",
            )
            feedback = gr.Textbox(label="Additional Feedback (optional)", lines=1)
            confidence = gr.Radio(
                choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)],
                label="Confidence Rating",
            )
            with gr.Row():
                prev_btn = gr.Button("Previous", variant="secondary")
                next_btn = gr.Button("Next", variant="primary")

        # New Forms Section
        with gr.Column(visible=False, elem_id="forms_section") as forms_section:
            gr.Markdown("## Pre-Test Questions")
            gr.Markdown("Please answer the following questions to complete the study.")
            form_radios = []
            for form_name, questions in forms_questions.items():
                for q in questions:
                    radio = gr.Radio(choices=q["options"], label=q["question"])
                    form_radios.append(radio)
            with gr.Row():
                back_to_questions_btn = gr.Button("Back to Questions", variant="secondary")
                submit_forms_btn = gr.Button("Submit Forms", variant="primary")

        # Completion Section (unchanged layout)
        with gr.Column(visible=False, elem_id="completion") as completion_section:
            gr.Markdown("# Thank You!")
            gr.Markdown("### Completion code: `CA7IOI65`")
            completion_md = gr.Markdown("Your annotations and form responses have been saved.")
            gr.HTML("""
                <p>Click <a href="https://app.prolific.com/researcher/submissions/complete?cc=CA7IOI65" target="_blank">here</a> to complete the task.</p>
            """)

        # Updated handle_id_submit to assign 28 random questions
        def handle_id_submit(prolific_id_val):
            if not prolific_id_val.strip():
                raise gr.Error("Please enter a valid Prolific ID")
            prolific_id = prolific_id_val.strip()
            assigned = load_assigned()
            if prolific_id in assigned:
                assignment_key = assigned[prolific_id]
            else:
                next_key = get_next_available_assignment(assigned)
                if next_key is None:
                    return {
                        id_section: gr.update(visible=True),
                        forms_section: gr.update(visible=False),
                        main_interface: gr.update(visible=False),
                        completion_section: gr.update(visible=False),
                        id_message: gr.update(value="The study is full. Thank you for your interest.", visible=True)
                    }
                assigned[prolific_id] = next_key
                save_assigned(assigned)
                assignment_key = next_key

            state.prolific_id = prolific_id
            state.assignment_key = assignment_key
            state.selected_indices = assignments[assignment_key]
            data = load_latest_data(prolific_id)
            if data:
                if not state.forms_completed:
                    return {
                        id_section: gr.update(visible=False),
                        forms_section: gr.update(visible=True),
                        main_interface: gr.update(visible=False),
                        completion_section: gr.update(visible=False),
                        id_message: gr.update(value="", visible=False)
                    }
                elif state.current_idx < 28:
                    return {
                        id_section: gr.update(visible=False),
                        forms_section: gr.update(visible=False),
                        main_interface: gr.update(visible=True),
                        completion_section: gr.update(visible=False),
                        id_message: gr.update(value="", visible=False),
                        **update_interface(state.current_idx)
                    }
                else:
                    return {
                        id_section: gr.update(visible=False),
                        forms_section: gr.update(visible=False),
                        main_interface: gr.update(visible=False),
                        completion_section: gr.update(visible=True),
                        id_message: gr.update(value="", visible=False)
                    }
            else:
                state.annotations = [None] * 28
                state.current_idx = 0
                state.forms_completed = False
                state.form_responses = {}
                return {
                    id_section: gr.update(visible=False),
                    forms_section: gr.update(visible=True),
                    main_interface: gr.update(visible=False),
                    completion_section: gr.update(visible=False),
                    id_message: gr.update(value="", visible=False)
                }
        # Updated update_interface to use selected_indices
        def update_interface(current_idx):
            if current_idx >= 28:
                current_idx = 27
            actual_idx = state.selected_indices[current_idx]
            current_data = response_pairs[actual_idx]
            progress = f"**Progress:** {current_idx/28:.0%} ({min(current_idx, 28)}/28)"
            annotation = state.annotations[current_idx] if current_idx < len(state.annotations) else None
            return {
                prompt_box: current_data.get("prompt", ""),
                response_a: current_data.get("responseA", ""),
                response_b: current_data.get("responseB", ""),
                progress_md: progress,
                feedback: annotation["feedback"] if annotation else "",
                confidence: annotation["confidence"] if annotation else None,
                selection_radio: annotation["selected"] if annotation else None
            }

        # Updated handle_navigation to transition to forms_section after 28 questions
        def handle_navigation(direction, selection, confidence_val, feedback_val):
            error_msg = None
            if direction == "next":
                if not selection:
                    error_msg = "Please select a response before proceeding."
                if not confidence_val:
                    error_msg = "Please select a confidence level before proceeding."
            if error_msg:
                gr.Warning(error_msg)
                return {
                    main_interface: gr.update(visible=True),
                    completion_section: gr.update(visible=False),
                    **update_interface(state.current_idx)
                }
            if selection and confidence_val:
                actual_idx = state.selected_indices[state.current_idx]
                annotation = {
                    "id": response_pairs[actual_idx]["id"],
                    "prompt": response_pairs[actual_idx]["prompt"],
                    "selected": selection,
                    "confidence": confidence_val,
                    "feedback": feedback_val,
                    "timestamp": datetime.now().isoformat()
                }
                state.annotations[state.current_idx] = annotation
            if direction == "next":
                new_idx = min(state.current_idx + 1, 28)
            else:
                new_idx = max(0, state.current_idx - 1)
            state.current_idx = new_idx
            save_annotations()
            if new_idx >= 28:
                return {
                    main_interface: gr.update(visible=False),
                    completion_section: gr.update(visible=True),
                    **update_interface(27)
                }
            else:
                return {
                    main_interface: gr.update(visible=True),
                    completion_section: gr.update(visible=False),
                    **update_interface(new_idx)
                }

        # New function to handle returning to questions from forms
        def handle_back_to_questions():
            state.current_idx = 27
            save_annotations()
            return {
                main_interface: gr.update(visible=True),
                forms_section: gr.update(visible=False),
                completion_section: gr.update(visible=False),
                **update_interface(27)
            }

        # New function to handle form submission
        def handle_forms_submit(*form_inputs):
            if any(input_val is None for input_val in form_inputs):
                gr.Warning("Please answer all questions before proceeding.")
                return {
                    forms_section: gr.update(visible=True),
                    main_interface: gr.update(visible=False),
                    completion_section: gr.update(visible=False)
                }
            state.form_responses = {}
            idx = 0
            for form_name, questions in forms_questions.items():
                for q in questions:
                    key = f"{form_name}_{q['question']}"
                    state.form_responses[key] = form_inputs[idx]
                    idx += 1
            state.forms_completed = True
            save_annotations()
            state.current_idx = 0
            return {
                forms_section: gr.update(visible=False),
                main_interface: gr.update(visible=True),
                completion_section: gr.update(visible=False),
                **update_interface(0)
            }

        # Event bindings
        id_submit_btn.click(
            handle_id_submit,
            inputs=prolific_id,
            outputs=[id_section, forms_section, main_interface, completion_section, id_message, prompt_box, 
                     response_a, response_b, progress_md, feedback, confidence, selection_radio]
        )

        prev_btn.click(
            handle_navigation,
            inputs=[gr.State("prev"), selection_radio, confidence, feedback],
            outputs=[main_interface, completion_section, prompt_box, response_a, 
                     response_b, progress_md, feedback, confidence, selection_radio]
        )

        next_btn.click(
            handle_navigation,
            inputs=[gr.State("next"), selection_radio, confidence, feedback],
            outputs=[main_interface, completion_section, prompt_box, response_a,
                     response_b, progress_md, feedback, confidence, selection_radio]
        )

        back_to_questions_btn.click(
            handle_back_to_questions,
            inputs=[],
            outputs=[main_interface, forms_section, completion_section, prompt_box, response_a, 
                     response_b, progress_md, feedback, confidence, selection_radio]
        )

        submit_forms_btn.click(
            handle_forms_submit,
            inputs=form_radios,
            outputs=[forms_section, main_interface, completion_section, prompt_box, response_a, 
                     response_b, progress_md, feedback, confidence, selection_radio]
        )

    return demo

if __name__ == "__main__":
    if not os.path.exists("assignments.json"):
        assignments,_ = generate_assignments()
        print("Assignments generated.")
        with open("assignments.json", "w") as f:
            json.dump(assignments, f, indent=2)
    else:
        with open("assignments.json", "r") as f:
            assignments = json.load(f)
        print("Assignments loaded.")
    app = create_interface()
    app.launch()