Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -3,16 +3,16 @@ import json
|
|
3 |
from datetime import datetime
|
4 |
import os
|
5 |
import logging
|
|
|
6 |
|
|
|
7 |
def _setup_logger():
|
8 |
log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s")
|
9 |
logger = logging.getLogger()
|
10 |
logger.setLevel(logging.INFO)
|
11 |
-
|
12 |
console_handler = logging.StreamHandler()
|
13 |
console_handler.setFormatter(log_format)
|
14 |
logger.handlers = [console_handler]
|
15 |
-
|
16 |
return logger
|
17 |
|
18 |
logger = _setup_logger()
|
@@ -20,81 +20,40 @@ logger = _setup_logger()
|
|
20 |
DATA_DIR = "annotations_data2"
|
21 |
os.makedirs(DATA_DIR, exist_ok=True)
|
22 |
|
|
|
23 |
with open("test_pairs2.json", "r") as f:
|
24 |
response_pairs = json.load(f)
|
25 |
|
|
|
26 |
custom_css = """
|
27 |
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
}
|
33 |
-
|
34 |
-
.
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
}
|
39 |
-
|
40 |
-
button {
|
41 |
-
font-weight: 500 !important;
|
42 |
-
transition: all 0.2s ease !important;
|
43 |
-
font-family: 'Roboto', sans-serif !important;
|
44 |
-
}
|
45 |
-
|
46 |
-
button:hover {
|
47 |
-
transform: translateY(-1px);
|
48 |
-
}
|
49 |
-
|
50 |
-
.progress {
|
51 |
-
color: #4f46e5;
|
52 |
-
font-weight: 500;
|
53 |
-
}
|
54 |
-
|
55 |
-
textarea {
|
56 |
-
border-radius: 8px !important;
|
57 |
-
padding: 12px !important;
|
58 |
-
font-family: 'Roboto', sans-serif !important;
|
59 |
-
}
|
60 |
-
|
61 |
-
.selected-response {
|
62 |
-
border: 2px solid #4f46e5 !important;
|
63 |
-
background-color: #f5f3ff;
|
64 |
-
}
|
65 |
-
|
66 |
-
.instruction-panel {
|
67 |
-
background: #f8f9fa !important;
|
68 |
-
border: 1px solid #e0e0e0 !important;
|
69 |
-
border-radius: 12px !important;
|
70 |
-
padding: 25px !important;
|
71 |
-
margin-bottom: 25px !important;
|
72 |
-
}
|
73 |
-
|
74 |
-
.criteria-list {
|
75 |
-
margin-left: 20px !important;
|
76 |
-
list-style-type: none !important;
|
77 |
-
}
|
78 |
-
|
79 |
-
.criteria-item {
|
80 |
-
padding: 8px 0 !important;
|
81 |
-
}
|
82 |
-
|
83 |
-
.highlight {
|
84 |
-
color: #4f46e5;
|
85 |
-
font-weight: 500;
|
86 |
-
}
|
87 |
"""
|
88 |
|
|
|
89 |
class State:
|
90 |
def __init__(self):
|
91 |
self.current_idx = 0
|
92 |
self.prolific_id = ""
|
93 |
-
self.
|
|
|
|
|
|
|
94 |
self.start_time = datetime.now()
|
95 |
|
96 |
state = State()
|
97 |
|
|
|
98 |
def save_annotations():
|
99 |
if not state.prolific_id:
|
100 |
return
|
@@ -102,22 +61,30 @@ def save_annotations():
|
|
102 |
filepath = os.path.join(DATA_DIR, filename)
|
103 |
data = {
|
104 |
"prolific_id": state.prolific_id,
|
|
|
105 |
"duration": (datetime.now() - state.start_time).total_seconds(),
|
106 |
"current_idx": state.current_idx,
|
107 |
-
"annotations": state.annotations
|
|
|
|
|
108 |
}
|
109 |
with open(filepath, "w") as f:
|
110 |
json.dump(data, f, indent=2)
|
111 |
logger.info(f"Saved annotations to {filepath}")
|
112 |
return filepath
|
113 |
|
|
|
114 |
def load_latest_data(prolific_id):
|
115 |
filename = f"{prolific_id}_latest.json"
|
116 |
filepath = os.path.join(DATA_DIR, filename)
|
117 |
if os.path.exists(filepath):
|
118 |
try:
|
119 |
data = json.load(open(filepath))
|
120 |
-
|
|
|
|
|
|
|
|
|
121 |
return data
|
122 |
except Exception as e:
|
123 |
logger.error(f"Error loading {filepath}: {e}")
|
@@ -148,21 +115,85 @@ In this task, you'll act as a judge comparing two AI chatbot responses. Your goa
|
|
148 |
5. Provide optional feedback and confidence rating
|
149 |
6. Click "Next" to continue or "Previous" to review
|
150 |
|
151 |
-
**Note:** You
|
152 |
-
|
153 |
-
*We do not expect any risks beyond what you’d experience in daily life from joining this study. You’ll just read questions and answers, pick your favorite, and rate your confidence—nothing stressful or harmful. It’s as safe as reading a webpage or filling out a short survey.*
|
154 |
|
155 |
*Thank you for contributing to our research! Your input is valuable.*
|
156 |
"""
|
157 |
|
158 |
-
MINI_INSTRUCTION = """You’ll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one that’s better based on:
|
159 |
|
160 |
*Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!*
|
161 |
"""
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
def create_interface():
|
164 |
with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo:
|
165 |
-
# User ID Section
|
166 |
with gr.Column(visible=True, elem_id="id_section") as id_section:
|
167 |
with gr.Column(elem_classes="instruction-panel"):
|
168 |
gr.Markdown(INSTRUCTION)
|
@@ -171,13 +202,13 @@ def create_interface():
|
|
171 |
prolific_id = gr.Textbox(label="Enter your Prolific ID")
|
172 |
id_submit_btn = gr.Button("Submit", variant="primary")
|
173 |
|
174 |
-
# Main Interface
|
175 |
with gr.Column(visible=False, elem_id="main_interface") as main_interface:
|
176 |
-
progress_md = gr.Markdown("**Progress:** 0% (0/
|
177 |
gr.HTML('<style>.prompt-highlight { background-color: #e6f7ff; padding: 10px; border: 1px solid #91d5ff; border-radius: 5px; }</style>')
|
178 |
gr.Markdown(MINI_INSTRUCTION)
|
179 |
gr.Markdown("---")
|
180 |
-
gr.Markdown("### Current Question")
|
181 |
prompt_box = gr.Markdown(elem_classes="prompt-highlight")
|
182 |
with gr.Row():
|
183 |
with gr.Column(variant="panel"):
|
@@ -190,7 +221,7 @@ def create_interface():
|
|
190 |
choices=[("Response A", "A"), ("Response B", "B")],
|
191 |
label="Select the better response",
|
192 |
)
|
193 |
-
feedback = gr.Textbox(label="Additional Feedback (optional)", lines=
|
194 |
confidence = gr.Radio(
|
195 |
choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)],
|
196 |
label="Confidence Rating",
|
@@ -199,48 +230,85 @@ def create_interface():
|
|
199 |
prev_btn = gr.Button("Previous", variant="secondary")
|
200 |
next_btn = gr.Button("Next", variant="primary")
|
201 |
|
202 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
with gr.Column(visible=False, elem_id="completion") as completion_section:
|
204 |
gr.Markdown("# Thank You!")
|
205 |
gr.Markdown("### Completion code: `CA7IOI65`")
|
206 |
-
completion_md = gr.Markdown("Your annotations have been saved.")
|
207 |
gr.HTML("""
|
208 |
<p>Click <a href="https://app.prolific.com/researcher/submissions/complete?cc=CA7IOI65" target="_blank">here</a> to complete the task.</p>
|
209 |
""")
|
210 |
|
|
|
211 |
def handle_id_submit(prolific_id_val):
|
212 |
if not prolific_id_val.strip():
|
213 |
raise gr.Error("Please enter a valid Prolific ID")
|
214 |
state.prolific_id = prolific_id_val.strip()
|
215 |
data = load_latest_data(state.prolific_id)
|
216 |
-
|
217 |
if data:
|
218 |
-
state.
|
219 |
-
state.current_idx = data.get("current_idx", 0)
|
220 |
-
if state.current_idx >= len(response_pairs):
|
221 |
-
save_annotations()
|
222 |
return {
|
223 |
id_section: gr.update(visible=False),
|
224 |
main_interface: gr.update(visible=False),
|
|
|
225 |
completion_section: gr.update(visible=True)
|
226 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
else:
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
state.current_idx = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
if idx >= len(response_pairs):
|
240 |
-
idx = len(response_pairs) - 1
|
241 |
-
current_data = response_pairs[idx] if idx < len(response_pairs) else {}
|
242 |
-
progress = f"**Progress:** {idx/len(response_pairs):.0%} ({idx}/{len(response_pairs)})"
|
243 |
-
annotation = state.annotations[idx] if idx < len(state.annotations) else None
|
244 |
return {
|
245 |
prompt_box: current_data.get("prompt", ""),
|
246 |
response_a: current_data.get("responseA", ""),
|
@@ -251,6 +319,7 @@ def create_interface():
|
|
251 |
selection_radio: annotation["selected"] if annotation else None
|
252 |
}
|
253 |
|
|
|
254 |
def handle_navigation(direction, selection, confidence_val, feedback_val):
|
255 |
error_msg = None
|
256 |
if direction == "next":
|
@@ -258,81 +327,116 @@ def create_interface():
|
|
258 |
error_msg = "Please select a response before proceeding."
|
259 |
if not confidence_val:
|
260 |
error_msg = "Please select a confidence level before proceeding."
|
261 |
-
|
262 |
if error_msg:
|
263 |
gr.Warning(error_msg)
|
264 |
return {
|
265 |
main_interface: gr.update(visible=True),
|
|
|
266 |
completion_section: gr.update(visible=False),
|
267 |
**update_interface(state.current_idx)
|
268 |
}
|
269 |
-
|
270 |
-
# Save current annotation
|
271 |
if selection and confidence_val:
|
|
|
272 |
annotation = {
|
273 |
-
"id": response_pairs[
|
274 |
-
"prompt": response_pairs[
|
275 |
"selected": selection,
|
276 |
"confidence": confidence_val,
|
277 |
"feedback": feedback_val,
|
278 |
"timestamp": datetime.now().isoformat()
|
279 |
}
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
state.current_idx = new_idx
|
289 |
-
save_annotations()
|
290 |
-
|
291 |
-
if new_idx >= len(response_pairs):
|
292 |
-
return {
|
293 |
-
main_interface: gr.update(visible=False),
|
294 |
-
completion_section: gr.update(visible=True),
|
295 |
-
**update_interface(new_idx)
|
296 |
-
}
|
297 |
-
|
298 |
return {
|
299 |
-
main_interface: gr.update(visible=
|
|
|
300 |
completion_section: gr.update(visible=False),
|
301 |
-
**update_interface(
|
302 |
}
|
303 |
-
|
304 |
-
except Exception as e:
|
305 |
-
logger.error(f"Navigation error: {e}")
|
306 |
return {
|
307 |
main_interface: gr.update(visible=True),
|
|
|
308 |
completion_section: gr.update(visible=False),
|
309 |
-
**update_interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
# Event bindings
|
313 |
id_submit_btn.click(
|
314 |
handle_id_submit,
|
315 |
inputs=prolific_id,
|
316 |
-
outputs=[id_section, main_interface, completion_section, prompt_box,
|
317 |
-
|
318 |
)
|
319 |
|
320 |
prev_btn.click(
|
321 |
handle_navigation,
|
322 |
inputs=[gr.State("prev"), selection_radio, confidence, feedback],
|
323 |
-
outputs=[main_interface, completion_section, prompt_box, response_a,
|
324 |
-
|
325 |
)
|
326 |
|
327 |
next_btn.click(
|
328 |
handle_navigation,
|
329 |
inputs=[gr.State("next"), selection_radio, confidence, feedback],
|
330 |
-
outputs=[main_interface, completion_section, prompt_box, response_a,
|
331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
)
|
333 |
|
334 |
return demo
|
335 |
|
336 |
if __name__ == "__main__":
|
337 |
app = create_interface()
|
338 |
-
app.launch()
|
|
|
3 |
from datetime import datetime
|
4 |
import os
|
5 |
import logging
|
6 |
+
import random
|
7 |
|
8 |
+
# Logger setup (unchanged)
|
9 |
def _setup_logger():
|
10 |
log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s")
|
11 |
logger = logging.getLogger()
|
12 |
logger.setLevel(logging.INFO)
|
|
|
13 |
console_handler = logging.StreamHandler()
|
14 |
console_handler.setFormatter(log_format)
|
15 |
logger.handlers = [console_handler]
|
|
|
16 |
return logger
|
17 |
|
18 |
logger = _setup_logger()
|
|
|
20 |
DATA_DIR = "annotations_data2"
|
21 |
os.makedirs(DATA_DIR, exist_ok=True)
|
22 |
|
23 |
+
# Load questions from JSON (unchanged)
|
24 |
with open("test_pairs2.json", "r") as f:
|
25 |
response_pairs = json.load(f)
|
26 |
|
27 |
+
# CSS (unchanged)
|
28 |
custom_css = """
|
29 |
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
|
30 |
+
body { font-family: 'Roboto', sans-serif !important; line-height: 1.6; }
|
31 |
+
.panel { border: 1px solid #e5e7eb !important; border-radius: 12px !important; padding: 20px !important; }
|
32 |
+
button { font-weight: 500 !important; transition: all 0.2s ease !important; font-family: 'Roboto', sans-serif !important; }
|
33 |
+
button:hover { transform: translateY(-1px); }
|
34 |
+
.progress { color: #4f46e5; font-weight: 500; }
|
35 |
+
textarea { border-radius: 8px !important; padding: 12px !important; font-family: 'Roboto', sans-serif !important; }
|
36 |
+
.selected-response { border: 2px solid #4f46e5 !important; background-color: #f5f3ff; }
|
37 |
+
.instruction-panel { background: #f8f9fa !important; border: 1px solid #e0e0e0 !important; border-radius: 12px !important; padding: 25px !important; margin-bottom: 25px !important; }
|
38 |
+
.criteria-list { margin-left: 20px !important; list-style-type: none !important; }
|
39 |
+
.criteria-item { padding: 8px 0 !important; }
|
40 |
+
.highlight { color: #4f46e5; font-weight: 500; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"""
|
42 |
|
43 |
+
# Updated State class to include selected_indices, form_responses, and forms_completed
|
44 |
class State:
|
45 |
def __init__(self):
|
46 |
self.current_idx = 0
|
47 |
self.prolific_id = ""
|
48 |
+
self.selected_indices = [] # List of 40 question indices for this user
|
49 |
+
self.annotations = [] # Annotations for the 40 questions
|
50 |
+
self.form_responses = {} # Responses to post-test forms
|
51 |
+
self.forms_completed = False # Flag for form completion
|
52 |
self.start_time = datetime.now()
|
53 |
|
54 |
state = State()
|
55 |
|
56 |
+
# Updated save_annotations to include new fields
|
57 |
def save_annotations():
|
58 |
if not state.prolific_id:
|
59 |
return
|
|
|
61 |
filepath = os.path.join(DATA_DIR, filename)
|
62 |
data = {
|
63 |
"prolific_id": state.prolific_id,
|
64 |
+
"selected_indices": state.selected_indices,
|
65 |
"duration": (datetime.now() - state.start_time).total_seconds(),
|
66 |
"current_idx": state.current_idx,
|
67 |
+
"annotations": state.annotations,
|
68 |
+
"form_responses": state.form_responses,
|
69 |
+
"forms_completed": state.forms_completed
|
70 |
}
|
71 |
with open(filepath, "w") as f:
|
72 |
json.dump(data, f, indent=2)
|
73 |
logger.info(f"Saved annotations to {filepath}")
|
74 |
return filepath
|
75 |
|
76 |
+
# Updated load_latest_data to load new fields
|
77 |
def load_latest_data(prolific_id):
|
78 |
filename = f"{prolific_id}_latest.json"
|
79 |
filepath = os.path.join(DATA_DIR, filename)
|
80 |
if os.path.exists(filepath):
|
81 |
try:
|
82 |
data = json.load(open(filepath))
|
83 |
+
state.selected_indices = data.get("selected_indices", [])
|
84 |
+
state.annotations = data.get("annotations", [])
|
85 |
+
state.form_responses = data.get("form_responses", {})
|
86 |
+
state.forms_completed = data.get("forms_completed", False)
|
87 |
+
state.current_idx = min(max(data.get("current_idx", 0), 0), 39) # Cap at 39 (0-39 for 40 questions)
|
88 |
return data
|
89 |
except Exception as e:
|
90 |
logger.error(f"Error loading {filepath}: {e}")
|
|
|
115 |
5. Provide optional feedback and confidence rating
|
116 |
6. Click "Next" to continue or "Previous" to review
|
117 |
|
118 |
+
**Note:** You need select a response and confidence level before proceeding to the next question.
|
|
|
|
|
119 |
|
120 |
*Thank you for contributing to our research! Your input is valuable.*
|
121 |
"""
|
122 |
|
123 |
+
MINI_INSTRUCTION = """You’ll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one that’s better based on: Perceived Usefulness (answers well, gives useful info), and Social Presence (understands feelings, fits the situation).
|
124 |
|
125 |
*Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!*
|
126 |
"""
|
127 |
|
128 |
+
# Define post-test form questions (placeholders; replace with actual questions if available)
|
129 |
+
forms_questions = {
|
130 |
+
"Neuro-QoL Cognition Function": [
|
131 |
+
{"question": "In the past 7 days, I had to read something several times to understand it.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
|
132 |
+
{"question": "In the past 7 days, I had to work really hard to pay attention or I would make a mistake.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
|
133 |
+
{"question": "In the past 7 days, I had trouble concentrating.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
|
134 |
+
{"question": "In the past 7 days, I had trouble remembering things.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]}
|
135 |
+
],
|
136 |
+
"Wong and Law Emotional Intelligence Scale (WLEIS)": [
|
137 |
+
# // SEA
|
138 |
+
{"question": "I have a good sense of why I have certain feelings most of the time.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]},
|
139 |
+
{"question": "I have good understanding of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
140 |
+
{"question": "I really understand what I feel.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
141 |
+
{"question": "I always know whether I am happy or not.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
142 |
+
# // OEA
|
143 |
+
{"question": "I always know my friends’ emotions from their behavior.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
144 |
+
{"question": "I am a good observer of others’ emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
145 |
+
{"question": "I am sensitive to the feelings and emotions of others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
146 |
+
{"question": "I have good understanding of the emotions of people around me.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
147 |
+
# // UOE
|
148 |
+
{"question": "I always set goals for myself and then try my best to achieve them.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
149 |
+
{"question": "I always tell myself I am a competent person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
150 |
+
{"question": "I am a self-motivated person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
151 |
+
{"question": "I would always encourage myself to try my best.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
152 |
+
# ROE
|
153 |
+
{"question": "I am able to control my temper and handle difficulties rationally.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
154 |
+
{"question": "I can always calm down quickly when I am very angry.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
155 |
+
{"question": "I have good control of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
156 |
+
{"question": "I can always stay calm in stressful situations.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}
|
157 |
+
],
|
158 |
+
"Algorithmic Aversion": [
|
159 |
+
# Trust in LLM
|
160 |
+
{"question": "I trust the answers provided by AI chatbots (e.g., ChatGPT, Grok) to be accurate.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]},
|
161 |
+
{"question": "I feel confident relying on a AI chatbot for important tasks.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
162 |
+
{"question": "I worry that AI chatbots might give me incorrect information.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
163 |
+
|
164 |
+
# Preference for Human vs. LLM
|
165 |
+
{"question": "I prefer asking a human expert over a AI chatbot for advice.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
166 |
+
{"question": "I would rather use a human-written article than one generated by a AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
167 |
+
{"question": "I find human interaction more valuable than interacting with a AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
168 |
+
|
169 |
+
# Willingness to Use LLM
|
170 |
+
{"question": "I would avoid using a AI chatbot if I had other options.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
171 |
+
{"question": "I am willing to use a AI chatbot for daily tasks (e.g., writing, research).", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
|
172 |
+
{"question": "I would recommend a AI chatbot to others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}
|
173 |
+
],
|
174 |
+
"Demographics": [
|
175 |
+
{"question": "What is your highest level of education?", "options": [
|
176 |
+
"Less than high school",
|
177 |
+
"High school diploma",
|
178 |
+
"Some college",
|
179 |
+
"Associate's degree",
|
180 |
+
"Bachelor's degree",
|
181 |
+
"Master's degree",
|
182 |
+
"Doctoral degree"
|
183 |
+
]},
|
184 |
+
{"question": "How familiar are you with AI chatbots?", "options": [
|
185 |
+
"Not familiar at all",
|
186 |
+
"Slightly familiar",
|
187 |
+
"Moderately familiar",
|
188 |
+
"Very familiar",
|
189 |
+
"Extremely familiar"
|
190 |
+
]}
|
191 |
+
]
|
192 |
+
}
|
193 |
+
|
194 |
def create_interface():
|
195 |
with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo:
|
196 |
+
# User ID Section (unchanged layout)
|
197 |
with gr.Column(visible=True, elem_id="id_section") as id_section:
|
198 |
with gr.Column(elem_classes="instruction-panel"):
|
199 |
gr.Markdown(INSTRUCTION)
|
|
|
202 |
prolific_id = gr.Textbox(label="Enter your Prolific ID")
|
203 |
id_submit_btn = gr.Button("Submit", variant="primary")
|
204 |
|
205 |
+
# Main Interface (updated for 40 questions)
|
206 |
with gr.Column(visible=False, elem_id="main_interface") as main_interface:
|
207 |
+
progress_md = gr.Markdown("**Progress:** 0% (0/40)", elem_classes="progress")
|
208 |
gr.HTML('<style>.prompt-highlight { background-color: #e6f7ff; padding: 10px; border: 1px solid #91d5ff; border-radius: 5px; }</style>')
|
209 |
gr.Markdown(MINI_INSTRUCTION)
|
210 |
gr.Markdown("---")
|
211 |
+
gr.Markdown("### Current Question From a User")
|
212 |
prompt_box = gr.Markdown(elem_classes="prompt-highlight")
|
213 |
with gr.Row():
|
214 |
with gr.Column(variant="panel"):
|
|
|
221 |
choices=[("Response A", "A"), ("Response B", "B")],
|
222 |
label="Select the better response",
|
223 |
)
|
224 |
+
feedback = gr.Textbox(label="Additional Feedback (optional)", lines=1)
|
225 |
confidence = gr.Radio(
|
226 |
choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)],
|
227 |
label="Confidence Rating",
|
|
|
230 |
prev_btn = gr.Button("Previous", variant="secondary")
|
231 |
next_btn = gr.Button("Next", variant="primary")
|
232 |
|
233 |
+
# New Forms Section
|
234 |
+
with gr.Column(visible=False, elem_id="forms_section") as forms_section:
|
235 |
+
gr.Markdown("## Post-Test Questions")
|
236 |
+
gr.Markdown("Please answer the following questions to complete the study.")
|
237 |
+
form_radios = []
|
238 |
+
for form_name, questions in forms_questions.items():
|
239 |
+
for q in questions:
|
240 |
+
radio = gr.Radio(choices=q["options"], label=q["question"])
|
241 |
+
form_radios.append(radio)
|
242 |
+
with gr.Row():
|
243 |
+
back_to_questions_btn = gr.Button("Back to Questions", variant="secondary")
|
244 |
+
submit_forms_btn = gr.Button("Submit Forms", variant="primary")
|
245 |
+
|
246 |
+
# Completion Section (unchanged layout)
|
247 |
with gr.Column(visible=False, elem_id="completion") as completion_section:
|
248 |
gr.Markdown("# Thank You!")
|
249 |
gr.Markdown("### Completion code: `CA7IOI65`")
|
250 |
+
completion_md = gr.Markdown("Your annotations and form responses have been saved.")
|
251 |
gr.HTML("""
|
252 |
<p>Click <a href="https://app.prolific.com/researcher/submissions/complete?cc=CA7IOI65" target="_blank">here</a> to complete the task.</p>
|
253 |
""")
|
254 |
|
255 |
+
# Updated handle_id_submit to assign 40 random questions
|
256 |
def handle_id_submit(prolific_id_val):
|
257 |
if not prolific_id_val.strip():
|
258 |
raise gr.Error("Please enter a valid Prolific ID")
|
259 |
state.prolific_id = prolific_id_val.strip()
|
260 |
data = load_latest_data(state.prolific_id)
|
|
|
261 |
if data:
|
262 |
+
if state.forms_completed:
|
|
|
|
|
|
|
263 |
return {
|
264 |
id_section: gr.update(visible=False),
|
265 |
main_interface: gr.update(visible=False),
|
266 |
+
forms_section: gr.update(visible=False),
|
267 |
completion_section: gr.update(visible=True)
|
268 |
}
|
269 |
+
elif state.current_idx >= 40:
|
270 |
+
return {
|
271 |
+
id_section: gr.update(visible=False),
|
272 |
+
main_interface: gr.update(visible=False),
|
273 |
+
forms_section: gr.update(visible=True),
|
274 |
+
completion_section: gr.update(visible=False)
|
275 |
+
}
|
276 |
+
else:
|
277 |
+
return {
|
278 |
+
id_section: gr.update(visible=False),
|
279 |
+
main_interface: gr.update(visible=True),
|
280 |
+
forms_section: gr.update(visible=False),
|
281 |
+
completion_section: gr.update(visible=False),
|
282 |
+
**update_interface(state.current_idx)
|
283 |
+
}
|
284 |
else:
|
285 |
+
# New user: assign 40 random questions
|
286 |
+
seed = hash(state.prolific_id) % 1000000
|
287 |
+
random.seed(seed)
|
288 |
+
total_questions = len(response_pairs)
|
289 |
+
if total_questions < 40:
|
290 |
+
raise ValueError("Not enough questions available")
|
291 |
+
state.selected_indices = random.sample(range(total_questions), 40)
|
292 |
+
state.annotations = [None] * 40
|
293 |
+
state.form_responses = {}
|
294 |
+
state.forms_completed = False
|
295 |
state.current_idx = 0
|
296 |
+
return {
|
297 |
+
id_section: gr.update(visible=False),
|
298 |
+
main_interface: gr.update(visible=True),
|
299 |
+
forms_section: gr.update(visible=False),
|
300 |
+
completion_section: gr.update(visible=False),
|
301 |
+
**update_interface(0)
|
302 |
+
}
|
303 |
|
304 |
+
# Updated update_interface to use selected_indices
|
305 |
+
def update_interface(current_idx):
|
306 |
+
if current_idx >= 40:
|
307 |
+
current_idx = 39
|
308 |
+
actual_idx = state.selected_indices[current_idx]
|
309 |
+
current_data = response_pairs[actual_idx]
|
310 |
+
progress = f"**Progress:** {current_idx/40:.0%} ({min(current_idx, 40)}/40)"
|
311 |
+
annotation = state.annotations[current_idx] if current_idx < len(state.annotations) else None
|
|
|
|
|
|
|
|
|
|
|
312 |
return {
|
313 |
prompt_box: current_data.get("prompt", ""),
|
314 |
response_a: current_data.get("responseA", ""),
|
|
|
319 |
selection_radio: annotation["selected"] if annotation else None
|
320 |
}
|
321 |
|
322 |
+
# Updated handle_navigation to transition to forms_section after 40 questions
|
323 |
def handle_navigation(direction, selection, confidence_val, feedback_val):
|
324 |
error_msg = None
|
325 |
if direction == "next":
|
|
|
327 |
error_msg = "Please select a response before proceeding."
|
328 |
if not confidence_val:
|
329 |
error_msg = "Please select a confidence level before proceeding."
|
|
|
330 |
if error_msg:
|
331 |
gr.Warning(error_msg)
|
332 |
return {
|
333 |
main_interface: gr.update(visible=True),
|
334 |
+
forms_section: gr.update(visible=False),
|
335 |
completion_section: gr.update(visible=False),
|
336 |
**update_interface(state.current_idx)
|
337 |
}
|
|
|
|
|
338 |
if selection and confidence_val:
|
339 |
+
actual_idx = state.selected_indices[state.current_idx]
|
340 |
annotation = {
|
341 |
+
"id": response_pairs[actual_idx]["id"],
|
342 |
+
"prompt": response_pairs[actual_idx]["prompt"],
|
343 |
"selected": selection,
|
344 |
"confidence": confidence_val,
|
345 |
"feedback": feedback_val,
|
346 |
"timestamp": datetime.now().isoformat()
|
347 |
}
|
348 |
+
state.annotations[state.current_idx] = annotation
|
349 |
+
if direction == "next":
|
350 |
+
new_idx = min(state.current_idx + 1, 40)
|
351 |
+
else:
|
352 |
+
new_idx = max(0, state.current_idx - 1)
|
353 |
+
state.current_idx = new_idx
|
354 |
+
save_annotations()
|
355 |
+
if new_idx >= 40:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
return {
|
357 |
+
main_interface: gr.update(visible=False),
|
358 |
+
forms_section: gr.update(visible=True),
|
359 |
completion_section: gr.update(visible=False),
|
360 |
+
**update_interface(39) # Keep last question state
|
361 |
}
|
362 |
+
else:
|
|
|
|
|
363 |
return {
|
364 |
main_interface: gr.update(visible=True),
|
365 |
+
forms_section: gr.update(visible=False),
|
366 |
completion_section: gr.update(visible=False),
|
367 |
+
**update_interface(new_idx)
|
368 |
+
}
|
369 |
+
|
370 |
+
# New function to handle returning to questions from forms
|
371 |
+
def handle_back_to_questions():
|
372 |
+
state.current_idx = 39
|
373 |
+
save_annotations()
|
374 |
+
return {
|
375 |
+
main_interface: gr.update(visible=True),
|
376 |
+
forms_section: gr.update(visible=False),
|
377 |
+
completion_section: gr.update(visible=False),
|
378 |
+
**update_interface(39)
|
379 |
+
}
|
380 |
+
|
381 |
+
# New function to handle form submission
|
382 |
+
def handle_forms_submit(*form_inputs):
|
383 |
+
if any(input_val is None for input_val in form_inputs):
|
384 |
+
gr.Warning("Please answer all questions before submitting.")
|
385 |
+
return {
|
386 |
+
forms_section: gr.update(visible=True),
|
387 |
+
completion_section: gr.update(visible=False)
|
388 |
}
|
389 |
+
state.form_responses = {}
|
390 |
+
idx = 0
|
391 |
+
for form_name, questions in forms_questions.items():
|
392 |
+
for q in questions:
|
393 |
+
key = f"{form_name}_{q['question']}" # Convert tuple to string
|
394 |
+
state.form_responses[key] = form_inputs[idx]
|
395 |
+
idx += 1
|
396 |
+
state.forms_completed = True
|
397 |
+
save_annotations()
|
398 |
+
return {
|
399 |
+
forms_section: gr.update(visible=False),
|
400 |
+
completion_section: gr.update(visible=True)
|
401 |
+
}
|
402 |
|
403 |
# Event bindings
|
404 |
id_submit_btn.click(
|
405 |
handle_id_submit,
|
406 |
inputs=prolific_id,
|
407 |
+
outputs=[id_section, main_interface, forms_section, completion_section, prompt_box,
|
408 |
+
response_a, response_b, progress_md, feedback, confidence, selection_radio]
|
409 |
)
|
410 |
|
411 |
prev_btn.click(
|
412 |
handle_navigation,
|
413 |
inputs=[gr.State("prev"), selection_radio, confidence, feedback],
|
414 |
+
outputs=[main_interface, forms_section, completion_section, prompt_box, response_a,
|
415 |
+
response_b, progress_md, feedback, confidence, selection_radio]
|
416 |
)
|
417 |
|
418 |
next_btn.click(
|
419 |
handle_navigation,
|
420 |
inputs=[gr.State("next"), selection_radio, confidence, feedback],
|
421 |
+
outputs=[main_interface, forms_section, completion_section, prompt_box, response_a,
|
422 |
+
response_b, progress_md, feedback, confidence, selection_radio]
|
423 |
+
)
|
424 |
+
|
425 |
+
back_to_questions_btn.click(
|
426 |
+
handle_back_to_questions,
|
427 |
+
inputs=[],
|
428 |
+
outputs=[main_interface, forms_section, completion_section, prompt_box, response_a,
|
429 |
+
response_b, progress_md, feedback, confidence, selection_radio]
|
430 |
+
)
|
431 |
+
|
432 |
+
submit_forms_btn.click(
|
433 |
+
handle_forms_submit,
|
434 |
+
inputs=form_radios,
|
435 |
+
outputs=[forms_section, completion_section]
|
436 |
)
|
437 |
|
438 |
return demo
|
439 |
|
440 |
if __name__ == "__main__":
|
441 |
app = create_interface()
|
442 |
+
app.launch()
|