sutdaiday
commited on
Commit
Β·
15bfa69
1
Parent(s):
f997404
fixed
Browse files- ambuj_ESConv_results.csv +2 -0
- app.py +125 -312
ambuj_ESConv_results.csv
CHANGED
@@ -2,3 +2,5 @@ UserID,Dataset,Background,Response A Method,Response B Method,Identification,Com
|
|
2 |
ambuj,ESConv,User misses family who live overseas and feels homesick.,PPDPP,Ours,Prefer Response A,Prefer Response B,Prefer Response A,No preference
|
3 |
ambuj,ESConv,User complains of creative block when starting a new art project.,DPDP,Ours,Prefer Response A,Prefer Response A,Prefer Response B,No preference
|
4 |
ambuj,ESConv,User feels bombarded by negative social media comparisons.,DPDP,Ours,Prefer Response A,Prefer Response B,Prefer Response A,Prefer Response B
|
|
|
|
|
|
2 |
ambuj,ESConv,User misses family who live overseas and feels homesick.,PPDPP,Ours,Prefer Response A,Prefer Response B,Prefer Response A,No preference
|
3 |
ambuj,ESConv,User complains of creative block when starting a new art project.,DPDP,Ours,Prefer Response A,Prefer Response A,Prefer Response B,No preference
|
4 |
ambuj,ESConv,User feels bombarded by negative social media comparisons.,DPDP,Ours,Prefer Response A,Prefer Response B,Prefer Response A,Prefer Response B
|
5 |
+
ambuj,ESConv,User expresses fear about planning a solo travel adventure.,Ours,DPDP,Prefer Response A,Prefer Response A,Prefer Response B,Prefer Response B
|
6 |
+
ambuj,ESConv,User expresses fear about planning a solo travel adventure.,Ours,PPDPP,Prefer Response A,Prefer Response A,Prefer Response B,Prefer Response A
|
app.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
-
import random
|
|
|
|
|
4 |
import sys
|
5 |
|
6 |
-
print(">>> Gradio imported from:", gr.__file__)
|
7 |
-
print(">>> Gradio version :", getattr(gr, "__version__", "n/a"))
|
8 |
-
print(">>> sys.path contains :", sys.path[:5])
|
9 |
|
10 |
# βββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
11 |
DATASET_FILES = {
|
@@ -19,21 +21,18 @@ def prepare_examples(user_id: str, dataset: str):
|
|
19 |
Read CSV, create Ours-vs-[PPDPP|DPDP] pairs, shuffle + randomise L/R.
|
20 |
A deterministic seed (user_id+dataset) keeps the order stable for reloads.
|
21 |
"""
|
22 |
-
random.seed(f"{user_id}_{dataset}")
|
23 |
df = pd.read_csv(DATASET_FILES[dataset])
|
24 |
pairs = []
|
25 |
for _, row in df.iterrows():
|
26 |
for other in ["PPDPP", "DPDP"]:
|
27 |
-
pairs.append(
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
)
|
34 |
-
)
|
35 |
random.shuffle(pairs)
|
36 |
-
|
37 |
prepared = []
|
38 |
for item in pairs:
|
39 |
if random.random() < 0.5:
|
@@ -42,15 +41,13 @@ def prepare_examples(user_id: str, dataset: str):
|
|
42 |
else:
|
43 |
left_text, right_text = item["other"], item["ours"]
|
44 |
left_name, right_name = item["other_name"], "Ours"
|
45 |
-
prepared.append(
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
)
|
53 |
-
)
|
54 |
return prepared
|
55 |
|
56 |
|
@@ -60,24 +57,22 @@ def save_all_to_csv(user_id, dataset, examples, responses):
|
|
60 |
return
|
61 |
filename = f"{user_id}_{dataset}_results.csv"
|
62 |
metrics = list(next(iter(responses.values())).keys())
|
63 |
-
header =
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
with open(filename, "w", newline="", encoding="utf-8") as f:
|
68 |
writer = csv.DictWriter(f, fieldnames=header)
|
69 |
writer.writeheader()
|
70 |
for idx in sorted(responses):
|
71 |
ex = examples[idx]
|
72 |
-
row =
|
73 |
-
UserID
|
74 |
-
Dataset
|
75 |
-
Background
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
},
|
80 |
-
)
|
81 |
row.update(responses[idx])
|
82 |
writer.writerow(row)
|
83 |
|
@@ -90,30 +85,25 @@ def load_responses_from_csv(user_id, dataset, examples):
|
|
90 |
filename = f"{user_id}_{dataset}_results.csv"
|
91 |
if not os.path.exists(filename):
|
92 |
return {}
|
93 |
-
|
94 |
df = pd.read_csv(filename)
|
95 |
-
|
96 |
-
# Build lookup: (background, a_method, b_method) β idx
|
97 |
idx_map = {
|
98 |
(ex["background"], ex["left_name"], ex["right_name"]): i
|
99 |
for i, ex in enumerate(examples)
|
100 |
}
|
101 |
-
|
102 |
responses = {}
|
103 |
for _, row in df.iterrows():
|
104 |
-
key = (
|
|
|
|
|
|
|
|
|
105 |
if key in idx_map:
|
106 |
idx = idx_map[key]
|
107 |
metric_cols = [
|
108 |
-
c
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
"UserID",
|
113 |
-
"Dataset",
|
114 |
-
"Background",
|
115 |
-
"Response A Method",
|
116 |
-
"Response B Method",
|
117 |
]
|
118 |
]
|
119 |
responses[idx] = {k: row[k] for k in metric_cols}
|
@@ -125,31 +115,20 @@ def es_load_example(idx, examples, responses):
|
|
125 |
ex = examples[idx]
|
126 |
prev = responses.get(idx, {})
|
127 |
return (
|
128 |
-
ex["background"],
|
129 |
-
ex["left_text"],
|
130 |
-
ex["right_text"],
|
131 |
f"Item {idx+1} of {len(examples)}",
|
132 |
-
prev.get("Identification"),
|
133 |
-
prev.get("
|
134 |
-
prev.get("Suggestion"),
|
135 |
-
prev.get("Overall"),
|
136 |
-
"", # error
|
137 |
)
|
138 |
|
139 |
-
|
140 |
def cb_load_example(idx, examples, responses):
|
141 |
ex = examples[idx]
|
142 |
prev = responses.get(idx, {})
|
143 |
return (
|
144 |
-
ex["background"],
|
145 |
-
ex["left_text"],
|
146 |
-
ex["right_text"],
|
147 |
f"Item {idx+1} of {len(examples)}",
|
148 |
-
prev.get("Persuasiveness"),
|
149 |
-
prev.get("
|
150 |
-
prev.get("Naturalness"),
|
151 |
-
prev.get("Overall"),
|
152 |
-
"", # error
|
153 |
)
|
154 |
|
155 |
|
@@ -157,344 +136,178 @@ def cb_load_example(idx, examples, responses):
|
|
157 |
def login_fn(user_id, dataset):
|
158 |
if not user_id or not dataset:
|
159 |
raise gr.Error("Please enter your User ID and select a dataset.")
|
160 |
-
|
161 |
examples = prepare_examples(user_id, dataset)
|
162 |
responses = load_responses_from_csv(user_id, dataset, examples)
|
163 |
idx = 0
|
164 |
-
|
165 |
if dataset == "ESConv":
|
166 |
-
(
|
167 |
-
bg,
|
168 |
-
lft,
|
169 |
-
rgt,
|
170 |
-
prog,
|
171 |
-
ident,
|
172 |
-
com,
|
173 |
-
sug,
|
174 |
-
ovl,
|
175 |
-
err_es,
|
176 |
-
) = es_load_example(idx, examples, responses)
|
177 |
-
# CB placeholders
|
178 |
bg_cb = lft_cb = rgt_cb = prog_cb = ""
|
179 |
per = coh = nat = ovl_cb = None
|
180 |
err_cb = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
else:
|
182 |
-
(
|
183 |
-
bg_cb,
|
184 |
-
lft_cb,
|
185 |
-
rgt_cb,
|
186 |
-
prog_cb,
|
187 |
-
per,
|
188 |
-
coh,
|
189 |
-
nat,
|
190 |
-
ovl_cb,
|
191 |
-
err_cb,
|
192 |
-
) = cb_load_example(idx, examples, responses)
|
193 |
-
# ESConv placeholders
|
194 |
bg = lft = rgt = prog = ""
|
195 |
ident = com = sug = ovl = None
|
196 |
err_es = ""
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
# CB outputs
|
206 |
-
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb,
|
207 |
-
)
|
208 |
-
|
209 |
|
210 |
def logout_fn(user_id, dataset, examples, idx, responses):
|
211 |
-
# Save before quitting
|
212 |
if dataset:
|
213 |
save_all_to_csv(user_id, dataset, examples, responses)
|
214 |
-
|
215 |
-
# Empty / reset everything
|
216 |
return (
|
217 |
-
gr.update(visible=True),
|
218 |
-
gr.update(visible=False),
|
219 |
-
gr.update(visible=False),
|
220 |
-
"", "", [], 0, {},
|
221 |
-
*[""] * 9,
|
222 |
-
*[""] * 10
|
223 |
)
|
224 |
|
225 |
|
226 |
# βββ Navigation callback helpers βββββββββββββββββββββββββββββββββββββββββββββββ
|
227 |
def es_next_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
|
228 |
if None in (ident, com, sug, ovl):
|
229 |
-
|
230 |
-
|
231 |
-
ex["background"],
|
232 |
-
ex["left_text"],
|
233 |
-
ex["right_text"],
|
234 |
-
f"Item {idx+1} of {len(examples)}",
|
235 |
-
idx,
|
236 |
-
responses,
|
237 |
-
ident,
|
238 |
-
com,
|
239 |
-
sug,
|
240 |
-
ovl,
|
241 |
-
"All metrics must be answered before proceeding.",
|
242 |
-
)
|
243 |
-
|
244 |
-
responses[idx] = dict(
|
245 |
-
Identification=ident, Comforting=com, Suggestion=sug, Overall=ovl
|
246 |
-
)
|
247 |
save_all_to_csv(user_id, dataset, examples, responses)
|
248 |
idx += 1
|
249 |
-
|
250 |
if idx >= len(examples):
|
251 |
-
return ("π© Survey complete! Thank you.",) * 4 + (
|
252 |
-
|
253 |
-
responses,
|
254 |
-
None,
|
255 |
-
None,
|
256 |
-
None,
|
257 |
-
None,
|
258 |
-
"",
|
259 |
-
)
|
260 |
-
|
261 |
-
return es_load_example(idx, examples, responses)[:4] + (
|
262 |
-
idx,
|
263 |
-
responses,
|
264 |
-
) + es_load_example(idx, examples, responses)[4:]
|
265 |
-
|
266 |
|
267 |
def es_prev_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
|
268 |
if None not in (ident, com, sug, ovl):
|
269 |
-
responses[idx] =
|
270 |
-
Identification=ident, Comforting=com, Suggestion=sug, Overall=ovl
|
271 |
-
)
|
272 |
save_all_to_csv(user_id, dataset, examples, responses)
|
273 |
-
|
274 |
idx = max(0, idx - 1)
|
275 |
-
return es_load_example(idx, examples, responses)[:4]
|
276 |
-
idx,
|
277 |
-
responses,
|
278 |
-
) + es_load_example(idx, examples, responses)[4:]
|
279 |
-
|
280 |
|
281 |
def cb_next_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
|
282 |
if None in (per, coh, nat, ovl_cb):
|
283 |
-
|
284 |
-
|
285 |
-
ex["background"],
|
286 |
-
ex["left_text"],
|
287 |
-
ex["right_text"],
|
288 |
-
f"Item {idx+1} of {len(examples)}",
|
289 |
-
idx,
|
290 |
-
responses,
|
291 |
-
per,
|
292 |
-
coh,
|
293 |
-
nat,
|
294 |
-
ovl_cb,
|
295 |
-
"All metrics must be answered before proceeding.",
|
296 |
-
)
|
297 |
-
|
298 |
-
responses[idx] = dict(
|
299 |
-
Persuasiveness=per, Coherence=coh, Naturalness=nat, Overall=ovl_cb
|
300 |
-
)
|
301 |
save_all_to_csv(user_id, dataset, examples, responses)
|
302 |
idx += 1
|
303 |
-
|
304 |
if idx >= len(examples):
|
305 |
return ("π© Survey complete! Thank you.",) * 5 + (None, "")
|
306 |
-
|
307 |
-
return cb_load_example(idx, examples, responses)[:4] + (
|
308 |
-
idx,
|
309 |
-
responses,
|
310 |
-
) + cb_load_example(idx, examples, responses)[4:]
|
311 |
-
|
312 |
|
313 |
def cb_prev_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
|
314 |
if None not in (per, coh, nat, ovl_cb):
|
315 |
-
responses[idx] =
|
316 |
-
Persuasiveness=per, Coherence=coh, Naturalness=nat, Overall=ovl_cb
|
317 |
-
)
|
318 |
save_all_to_csv(user_id, dataset, examples, responses)
|
319 |
-
|
320 |
idx = max(0, idx - 1)
|
321 |
-
return cb_load_example(idx, examples, responses)[:4]
|
322 |
-
idx,
|
323 |
-
responses,
|
324 |
-
) + cb_load_example(idx, examples, responses)[4:]
|
325 |
|
326 |
|
327 |
# βββ Build Gradio App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
328 |
with gr.Blocks(title="Human Evaluation Survey") as demo:
|
329 |
-
#
|
330 |
with gr.Column() as login_panel:
|
331 |
gr.Markdown("## Human Evaluation Survey")
|
332 |
user_id_in = gr.Textbox(label="User ID")
|
333 |
-
ds_dd
|
334 |
-
start_btn
|
335 |
|
336 |
# Shared state
|
337 |
-
uid_state
|
338 |
-
ds_state
|
339 |
-
ex_state
|
340 |
-
idx_state
|
341 |
-
resp_state = gr.State({})
|
342 |
|
343 |
-
#
|
344 |
with gr.Column(visible=False) as es_panel:
|
345 |
-
bg
|
346 |
with gr.Row():
|
347 |
lbox = gr.Textbox(label="Response A", interactive=False)
|
348 |
rbox = gr.Textbox(label="Response B", interactive=False)
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
)
|
354 |
-
com = gr.Radio(
|
355 |
-
["Prefer Response A", "Prefer Response B", "No preference"],
|
356 |
-
label="Comforting (Com.)",
|
357 |
-
)
|
358 |
-
sug = gr.Radio(
|
359 |
-
["Prefer Response A", "Prefer Response B", "No preference"],
|
360 |
-
label="Suggestion (Sug.)",
|
361 |
-
)
|
362 |
-
ovl_es = gr.Radio(
|
363 |
-
["Prefer Response A", "Prefer Response B", "No preference"],
|
364 |
-
label="Overall (Ov.)",
|
365 |
-
)
|
366 |
err_es = gr.HTML(visible=False)
|
367 |
-
prog
|
368 |
with gr.Row():
|
369 |
-
prev_btn
|
370 |
-
next_btn
|
371 |
-
logout_es
|
372 |
|
373 |
-
#
|
374 |
with gr.Column(visible=False) as cb_panel:
|
375 |
-
bg_cb
|
376 |
with gr.Row():
|
377 |
lbox_cb = gr.Textbox(label="Response A", interactive=False)
|
378 |
rbox_cb = gr.Textbox(label="Response B", interactive=False)
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
)
|
384 |
-
coh = gr.Radio(
|
385 |
-
["Prefer Response A", "Prefer Response B", "No preference"],
|
386 |
-
label="Coherence (Coh.)",
|
387 |
-
)
|
388 |
-
nat = gr.Radio(
|
389 |
-
["Prefer Response A", "Prefer Response B", "No preference"],
|
390 |
-
label="Naturalness (Nat.)",
|
391 |
-
)
|
392 |
-
ovl_cb = gr.Radio(
|
393 |
-
["Prefer Response A", "Prefer Response B", "No preference"],
|
394 |
-
label="Overall",
|
395 |
-
)
|
396 |
err_cb = gr.HTML(visible=False)
|
397 |
-
prog_cb = gr.
|
398 |
with gr.Row():
|
399 |
-
prev_cb
|
400 |
-
next_cb
|
401 |
logout_cb = gr.Button("πͺ Logout")
|
402 |
|
403 |
-
#
|
404 |
start_btn.click(
|
405 |
login_fn,
|
406 |
inputs=[user_id_in, ds_dd],
|
407 |
outputs=[
|
408 |
-
login_panel,
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
ex_state,
|
414 |
-
idx_state,
|
415 |
-
resp_state,
|
416 |
-
# ESConv
|
417 |
-
bg,
|
418 |
-
lbox,
|
419 |
-
rbox,
|
420 |
-
prog,
|
421 |
-
ident,
|
422 |
-
com,
|
423 |
-
sug,
|
424 |
-
ovl_es,
|
425 |
-
err_es,
|
426 |
-
# CB
|
427 |
-
bg_cb,
|
428 |
-
lbox_cb,
|
429 |
-
rbox_cb,
|
430 |
-
prog_cb,
|
431 |
-
per,
|
432 |
-
coh,
|
433 |
-
nat,
|
434 |
-
ovl_cb,
|
435 |
-
err_cb,
|
436 |
-
],
|
437 |
)
|
438 |
|
439 |
-
# ESConv navigation
|
440 |
next_btn.click(
|
441 |
es_next_fn,
|
442 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
|
443 |
-
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es]
|
444 |
-
|
445 |
prev_btn.click(
|
446 |
es_prev_fn,
|
447 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
|
448 |
-
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es]
|
449 |
-
)
|
450 |
|
451 |
-
# CB navigation
|
452 |
next_cb.click(
|
453 |
cb_next_fn,
|
454 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
|
455 |
-
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]
|
456 |
-
|
457 |
prev_cb.click(
|
458 |
cb_prev_fn,
|
459 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
|
460 |
-
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]
|
461 |
-
)
|
462 |
|
463 |
-
# Logout buttons (both panels share same callback)
|
464 |
for logout_btn in (logout_es, logout_cb):
|
465 |
logout_btn.click(
|
466 |
logout_fn,
|
467 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state],
|
468 |
outputs=[
|
469 |
-
login_panel,
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
ex_state,
|
475 |
-
idx_state,
|
476 |
-
resp_state,
|
477 |
-
bg,
|
478 |
-
lbox,
|
479 |
-
rbox,
|
480 |
-
prog,
|
481 |
-
ident,
|
482 |
-
com,
|
483 |
-
sug,
|
484 |
-
ovl_es,
|
485 |
-
err_es,
|
486 |
-
bg_cb,
|
487 |
-
lbox_cb,
|
488 |
-
rbox_cb,
|
489 |
-
prog_cb,
|
490 |
-
per,
|
491 |
-
coh,
|
492 |
-
nat,
|
493 |
-
ovl_cb,
|
494 |
-
err_cb,
|
495 |
-
],
|
496 |
)
|
497 |
|
498 |
-
# βββ Run βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
499 |
if __name__ == "__main__":
|
500 |
demo.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
+
import random
|
4 |
+
import os
|
5 |
+
import csv
|
6 |
import sys
|
7 |
|
8 |
+
# print("">>> Gradio imported from:", gr.__file__)
|
9 |
+
# print(">>> Gradio version :", getattr(gr, "__version__", "n/a"))
|
10 |
+
# print(">>> sys.path contains :", sys.path[:5])
|
11 |
|
12 |
# βββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
13 |
DATASET_FILES = {
|
|
|
21 |
Read CSV, create Ours-vs-[PPDPP|DPDP] pairs, shuffle + randomise L/R.
|
22 |
A deterministic seed (user_id+dataset) keeps the order stable for reloads.
|
23 |
"""
|
24 |
+
random.seed(f"{user_id}_{dataset}")
|
25 |
df = pd.read_csv(DATASET_FILES[dataset])
|
26 |
pairs = []
|
27 |
for _, row in df.iterrows():
|
28 |
for other in ["PPDPP", "DPDP"]:
|
29 |
+
pairs.append({
|
30 |
+
"background": row["Background Information"],
|
31 |
+
"ours": row["Ours"],
|
32 |
+
"other": row[other],
|
33 |
+
"other_name": other,
|
34 |
+
})
|
|
|
|
|
35 |
random.shuffle(pairs)
|
|
|
36 |
prepared = []
|
37 |
for item in pairs:
|
38 |
if random.random() < 0.5:
|
|
|
41 |
else:
|
42 |
left_text, right_text = item["other"], item["ours"]
|
43 |
left_name, right_name = item["other_name"], "Ours"
|
44 |
+
prepared.append({
|
45 |
+
"background": item["background"],
|
46 |
+
"left_text": left_text,
|
47 |
+
"right_text": right_text,
|
48 |
+
"left_name": left_name,
|
49 |
+
"right_name": right_name,
|
50 |
+
})
|
|
|
|
|
51 |
return prepared
|
52 |
|
53 |
|
|
|
57 |
return
|
58 |
filename = f"{user_id}_{dataset}_results.csv"
|
59 |
metrics = list(next(iter(responses.values())).keys())
|
60 |
+
header = [
|
61 |
+
"UserID", "Dataset", "Background",
|
62 |
+
"Response A Method", "Response B Method"
|
63 |
+
] + metrics
|
64 |
with open(filename, "w", newline="", encoding="utf-8") as f:
|
65 |
writer = csv.DictWriter(f, fieldnames=header)
|
66 |
writer.writeheader()
|
67 |
for idx in sorted(responses):
|
68 |
ex = examples[idx]
|
69 |
+
row = {
|
70 |
+
"UserID": user_id,
|
71 |
+
"Dataset": dataset,
|
72 |
+
"Background": ex["background"],
|
73 |
+
"Response A Method": ex["left_name"],
|
74 |
+
"Response B Method": ex["right_name"],
|
75 |
+
}
|
|
|
|
|
76 |
row.update(responses[idx])
|
77 |
writer.writerow(row)
|
78 |
|
|
|
85 |
filename = f"{user_id}_{dataset}_results.csv"
|
86 |
if not os.path.exists(filename):
|
87 |
return {}
|
|
|
88 |
df = pd.read_csv(filename)
|
|
|
|
|
89 |
idx_map = {
|
90 |
(ex["background"], ex["left_name"], ex["right_name"]): i
|
91 |
for i, ex in enumerate(examples)
|
92 |
}
|
|
|
93 |
responses = {}
|
94 |
for _, row in df.iterrows():
|
95 |
+
key = (
|
96 |
+
row["Background"],
|
97 |
+
row["Response A Method"],
|
98 |
+
row["Response B Method"]
|
99 |
+
)
|
100 |
if key in idx_map:
|
101 |
idx = idx_map[key]
|
102 |
metric_cols = [
|
103 |
+
c for c in row.index
|
104 |
+
if c not in [
|
105 |
+
"UserID", "Dataset", "Background",
|
106 |
+
"Response A Method", "Response B Method"
|
|
|
|
|
|
|
|
|
|
|
107 |
]
|
108 |
]
|
109 |
responses[idx] = {k: row[k] for k in metric_cols}
|
|
|
115 |
ex = examples[idx]
|
116 |
prev = responses.get(idx, {})
|
117 |
return (
|
118 |
+
ex["background"], ex["left_text"], ex["right_text"],
|
|
|
|
|
119 |
f"Item {idx+1} of {len(examples)}",
|
120 |
+
prev.get("Identification"), prev.get("Comforting"),
|
121 |
+
prev.get("Suggestion"), prev.get("Overall"), "",
|
|
|
|
|
|
|
122 |
)
|
123 |
|
|
|
124 |
def cb_load_example(idx, examples, responses):
|
125 |
ex = examples[idx]
|
126 |
prev = responses.get(idx, {})
|
127 |
return (
|
128 |
+
ex["background"], ex["left_text"], ex["right_text"],
|
|
|
|
|
129 |
f"Item {idx+1} of {len(examples)}",
|
130 |
+
prev.get("Persuasiveness"), prev.get("Coherence"),
|
131 |
+
prev.get("Naturalness"), prev.get("Overall"), "",
|
|
|
|
|
|
|
132 |
)
|
133 |
|
134 |
|
|
|
136 |
def login_fn(user_id, dataset):
|
137 |
if not user_id or not dataset:
|
138 |
raise gr.Error("Please enter your User ID and select a dataset.")
|
|
|
139 |
examples = prepare_examples(user_id, dataset)
|
140 |
responses = load_responses_from_csv(user_id, dataset, examples)
|
141 |
idx = 0
|
|
|
142 |
if dataset == "ESConv":
|
143 |
+
bg, lft, rgt, prog, ident, com, sug, ovl, err_es = es_load_example(idx, examples, responses)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
bg_cb = lft_cb = rgt_cb = prog_cb = ""
|
145 |
per = coh = nat = ovl_cb = None
|
146 |
err_cb = ""
|
147 |
+
return (
|
148 |
+
gr.update(visible=False),
|
149 |
+
gr.update(visible=True),
|
150 |
+
gr.update(visible=False),
|
151 |
+
user_id, dataset, examples, idx, responses,
|
152 |
+
bg, lft, rgt, prog, ident, com, sug, ovl, err_es,
|
153 |
+
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb,
|
154 |
+
)
|
155 |
else:
|
156 |
+
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb = cb_load_example(idx, examples, responses)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
bg = lft = rgt = prog = ""
|
158 |
ident = com = sug = ovl = None
|
159 |
err_es = ""
|
160 |
+
return (
|
161 |
+
gr.update(visible=False),
|
162 |
+
gr.update(visible=False),
|
163 |
+
gr.update(visible=True),
|
164 |
+
user_id, dataset, examples, idx, responses,
|
165 |
+
bg, lft, rgt, prog, ident, com, sug, ovl, err_es,
|
166 |
+
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb,
|
167 |
+
)
|
|
|
|
|
|
|
|
|
168 |
|
169 |
def logout_fn(user_id, dataset, examples, idx, responses):
|
|
|
170 |
if dataset:
|
171 |
save_all_to_csv(user_id, dataset, examples, responses)
|
|
|
|
|
172 |
return (
|
173 |
+
gr.update(visible=True),
|
174 |
+
gr.update(visible=False),
|
175 |
+
gr.update(visible=False),
|
176 |
+
"", "", [], 0, {},
|
177 |
+
*[""] * 9,
|
178 |
+
*[""] * 10
|
179 |
)
|
180 |
|
181 |
|
182 |
# βββ Navigation callback helpers βββββββββββββββββββββββββββββββββββββββββββββββ
|
183 |
def es_next_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
|
184 |
if None in (ident, com, sug, ovl):
|
185 |
+
return (*es_load_example(idx, examples, responses)[:4], idx, responses, ident, com, sug, ovl, "All metrics must be answered before proceeding.")
|
186 |
+
responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
save_all_to_csv(user_id, dataset, examples, responses)
|
188 |
idx += 1
|
|
|
189 |
if idx >= len(examples):
|
190 |
+
return ("π© Survey complete! Thank you.",) * 4 + (idx, responses, None, None, None, None, "")
|
191 |
+
return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
def es_prev_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
|
194 |
if None not in (ident, com, sug, ovl):
|
195 |
+
responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl}
|
|
|
|
|
196 |
save_all_to_csv(user_id, dataset, examples, responses)
|
|
|
197 |
idx = max(0, idx - 1)
|
198 |
+
return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:])
|
|
|
|
|
|
|
|
|
199 |
|
200 |
def cb_next_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
|
201 |
if None in (per, coh, nat, ovl_cb):
|
202 |
+
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, per, coh, nat, ovl_cb, "All metrics must be answered before proceeding.")
|
203 |
+
responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
save_all_to_csv(user_id, dataset, examples, responses)
|
205 |
idx += 1
|
|
|
206 |
if idx >= len(examples):
|
207 |
return ("π© Survey complete! Thank you.",) * 5 + (None, "")
|
208 |
+
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:])
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
def cb_prev_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
|
211 |
if None not in (per, coh, nat, ovl_cb):
|
212 |
+
responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb}
|
|
|
|
|
213 |
save_all_to_csv(user_id, dataset, examples, responses)
|
|
|
214 |
idx = max(0, idx - 1)
|
215 |
+
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:])
|
|
|
|
|
|
|
216 |
|
217 |
|
218 |
# βββ Build Gradio App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
219 |
with gr.Blocks(title="Human Evaluation Survey") as demo:
|
220 |
+
# Login panel
|
221 |
with gr.Column() as login_panel:
|
222 |
gr.Markdown("## Human Evaluation Survey")
|
223 |
user_id_in = gr.Textbox(label="User ID")
|
224 |
+
ds_dd = gr.Dropdown(list(DATASET_FILES.keys()), label="Select dataset")
|
225 |
+
start_btn = gr.Button("Start survey")
|
226 |
|
227 |
# Shared state
|
228 |
+
uid_state = gr.State(value="")
|
229 |
+
ds_state = gr.State(value="")
|
230 |
+
ex_state = gr.State(value=[])
|
231 |
+
idx_state = gr.State(value=0)
|
232 |
+
resp_state = gr.State(value={})
|
233 |
|
234 |
+
# ESConv Panel
|
235 |
with gr.Column(visible=False) as es_panel:
|
236 |
+
bg = gr.Textbox(label="Background context", interactive=False)
|
237 |
with gr.Row():
|
238 |
lbox = gr.Textbox(label="Response A", interactive=False)
|
239 |
rbox = gr.Textbox(label="Response B", interactive=False)
|
240 |
+
ident = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Identification (Ident.)")
|
241 |
+
com = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Comforting (Com.)")
|
242 |
+
sug = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Suggestion (Sug.)")
|
243 |
+
ovl_es = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall (Ov.)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
err_es = gr.HTML(visible=False)
|
245 |
+
prog = gr.Textbox(label="Progress", interactive=False)
|
246 |
with gr.Row():
|
247 |
+
prev_btn = gr.Button("β Prev")
|
248 |
+
next_btn = gr.Button("βΆ Next")
|
249 |
+
logout_es = gr.Button("πͺ Logout")
|
250 |
|
251 |
+
# CraigslistBargain Panel
|
252 |
with gr.Column(visible=False) as cb_panel:
|
253 |
+
bg_cb = gr.Textbox(label="Background context", interactive=False)
|
254 |
with gr.Row():
|
255 |
lbox_cb = gr.Textbox(label="Response A", interactive=False)
|
256 |
rbox_cb = gr.Textbox(label="Response B", interactive=False)
|
257 |
+
per = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Persuasiveness (Per.)")
|
258 |
+
coh = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Coherence (Coh.)")
|
259 |
+
nat = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Naturalness (Nat.)")
|
260 |
+
ovl_cb = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
err_cb = gr.HTML(visible=False)
|
262 |
+
prog_cb = gr.Textbox(label="Progress", interactive=False)
|
263 |
with gr.Row():
|
264 |
+
prev_cb = gr.Button("β Prev")
|
265 |
+
next_cb = gr.Button("βΆ Next")
|
266 |
logout_cb = gr.Button("πͺ Logout")
|
267 |
|
268 |
+
# Wiring callbacks
|
269 |
start_btn.click(
|
270 |
login_fn,
|
271 |
inputs=[user_id_in, ds_dd],
|
272 |
outputs=[
|
273 |
+
login_panel, es_panel, cb_panel,
|
274 |
+
uid_state, ds_state, ex_state, idx_state, resp_state,
|
275 |
+
bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es,
|
276 |
+
bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb
|
277 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
)
|
279 |
|
|
|
280 |
next_btn.click(
|
281 |
es_next_fn,
|
282 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
|
283 |
+
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] );
|
284 |
+
|
285 |
prev_btn.click(
|
286 |
es_prev_fn,
|
287 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
|
288 |
+
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] );
|
|
|
289 |
|
|
|
290 |
next_cb.click(
|
291 |
cb_next_fn,
|
292 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
|
293 |
+
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]);
|
294 |
+
|
295 |
prev_cb.click(
|
296 |
cb_prev_fn,
|
297 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
|
298 |
+
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]);
|
|
|
299 |
|
|
|
300 |
for logout_btn in (logout_es, logout_cb):
|
301 |
logout_btn.click(
|
302 |
logout_fn,
|
303 |
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state],
|
304 |
outputs=[
|
305 |
+
login_panel, es_panel, cb_panel,
|
306 |
+
uid_state, ds_state, ex_state, idx_state, resp_state,
|
307 |
+
bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es,
|
308 |
+
bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb
|
309 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
)
|
311 |
|
|
|
312 |
if __name__ == "__main__":
|
313 |
demo.launch(share=True)
|