|
import gradio as gr |
|
import pandas as pd |
|
import random |
|
import os |
|
import csv |
|
import sys |
|
|
|
|
|
|
|
|
|
|
|
|
|
DATASET_FILES = { |
|
"ESConv": "./csv/esconv.csv", |
|
"CraigslistBargain": "./csv/craigslist.csv", |
|
} |
|
|
|
|
|
def prepare_examples(user_id: str, dataset: str): |
|
""" |
|
Read CSV, create Ours-vs-[PPDPP|DPDP] pairs, shuffle + randomise L/R. |
|
A deterministic seed (user_id+dataset) keeps the order stable for reloads. |
|
""" |
|
random.seed(f"{user_id}_{dataset}") |
|
df = pd.read_csv(DATASET_FILES[dataset]) |
|
pairs = [] |
|
for _, row in df.iterrows(): |
|
for other in ["PPDPP", "DPDP"]: |
|
pairs.append({ |
|
"background": row["Background Information"], |
|
"ours": row["Ours"], |
|
"other": row[other], |
|
"other_name": other, |
|
}) |
|
random.shuffle(pairs) |
|
prepared = [] |
|
for item in pairs: |
|
if random.random() < 0.5: |
|
left_text, right_text = item["ours"], item["other"] |
|
left_name, right_name = "Ours", item["other_name"] |
|
else: |
|
left_text, right_text = item["other"], item["ours"] |
|
left_name, right_name = item["other_name"], "Ours" |
|
prepared.append({ |
|
"background": item["background"], |
|
"left_text": left_text, |
|
"right_text": right_text, |
|
"left_name": left_name, |
|
"right_name": right_name, |
|
}) |
|
return prepared |
|
|
|
|
|
def save_all_to_csv(user_id, dataset, examples, responses): |
|
"""Rewrite CSV completely (idempotent).""" |
|
if not responses: |
|
return |
|
filename = f"{user_id}_{dataset}_results.csv" |
|
metrics = list(next(iter(responses.values())).keys()) |
|
header = [ |
|
"UserID", "Dataset", "Background", |
|
"Response A Method", "Response B Method" |
|
] + metrics |
|
with open(filename, "w", newline="", encoding="utf-8") as f: |
|
writer = csv.DictWriter(f, fieldnames=header) |
|
writer.writeheader() |
|
for idx in sorted(responses): |
|
ex = examples[idx] |
|
row = { |
|
"UserID": user_id, |
|
"Dataset": dataset, |
|
"Background": ex["background"], |
|
"Response A Method": ex["left_name"], |
|
"Response B Method": ex["right_name"], |
|
} |
|
row.update(responses[idx]) |
|
writer.writerow(row) |
|
|
|
|
|
def load_responses_from_csv(user_id, dataset, examples): |
|
""" |
|
Reconstruct a {idx: metrics-dict} mapping from an existing results file. |
|
Matching uses Background + response-method orientation to stay robust. |
|
""" |
|
filename = f"{user_id}_{dataset}_results.csv" |
|
if not os.path.exists(filename): |
|
return {} |
|
df = pd.read_csv(filename) |
|
idx_map = { |
|
(ex["background"], ex["left_name"], ex["right_name"]): i |
|
for i, ex in enumerate(examples) |
|
} |
|
responses = {} |
|
for _, row in df.iterrows(): |
|
key = ( |
|
row["Background"], |
|
row["Response A Method"], |
|
row["Response B Method"] |
|
) |
|
if key in idx_map: |
|
idx = idx_map[key] |
|
metric_cols = [ |
|
c for c in row.index |
|
if c not in [ |
|
"UserID", "Dataset", "Background", |
|
"Response A Method", "Response B Method" |
|
] |
|
] |
|
responses[idx] = {k: row[k] for k in metric_cols} |
|
return responses |
|
|
|
|
|
|
|
def es_load_example(idx, examples, responses): |
|
ex = examples[idx] |
|
prev = responses.get(idx, {}) |
|
return ( |
|
ex["background"], ex["left_text"], ex["right_text"], |
|
f"Item {idx+1} of {len(examples)}", |
|
prev.get("Identification"), prev.get("Comforting"), |
|
prev.get("Suggestion"), prev.get("Overall"), "", |
|
) |
|
|
|
def cb_load_example(idx, examples, responses): |
|
ex = examples[idx] |
|
prev = responses.get(idx, {}) |
|
return ( |
|
ex["background"], ex["left_text"], ex["right_text"], |
|
f"Item {idx+1} of {len(examples)}", |
|
prev.get("Persuasiveness"), prev.get("Coherence"), |
|
prev.get("Naturalness"), prev.get("Overall"), "", |
|
) |
|
|
|
|
|
|
|
def login_fn(user_id, dataset): |
|
if not user_id or not dataset: |
|
raise gr.Error("Please enter your User ID and select a dataset.") |
|
examples = prepare_examples(user_id, dataset) |
|
responses = load_responses_from_csv(user_id, dataset, examples) |
|
idx = 0 |
|
if dataset == "ESConv": |
|
bg, lft, rgt, prog, ident, com, sug, ovl, err_es = es_load_example(idx, examples, responses) |
|
bg_cb = lft_cb = rgt_cb = prog_cb = "" |
|
per = coh = nat = ovl_cb = None |
|
err_cb = "" |
|
return ( |
|
gr.update(visible=False), |
|
gr.update(visible=True), |
|
gr.update(visible=False), |
|
user_id, dataset, examples, idx, responses, |
|
bg, lft, rgt, prog, ident, com, sug, ovl, err_es, |
|
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb, |
|
) |
|
else: |
|
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb = cb_load_example(idx, examples, responses) |
|
bg = lft = rgt = prog = "" |
|
ident = com = sug = ovl = None |
|
err_es = "" |
|
return ( |
|
gr.update(visible=False), |
|
gr.update(visible=False), |
|
gr.update(visible=True), |
|
user_id, dataset, examples, idx, responses, |
|
bg, lft, rgt, prog, ident, com, sug, ovl, err_es, |
|
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb, |
|
) |
|
|
|
def logout_fn(user_id, dataset, examples, idx, responses): |
|
if dataset: |
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
return ( |
|
gr.update(visible=True), |
|
gr.update(visible=False), |
|
gr.update(visible=False), |
|
"", "", [], 0, {}, |
|
*[""] * 9, |
|
*[""] * 10 |
|
) |
|
|
|
|
|
|
|
def es_next_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl): |
|
if None in (ident, com, sug, ovl): |
|
return (*es_load_example(idx, examples, responses)[:4], idx, responses, ident, com, sug, ovl, "All metrics must be answered before proceeding.") |
|
responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl} |
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
idx += 1 |
|
if idx >= len(examples): |
|
return ("π© Survey complete! Thank you.",) * 4 + (idx, responses, None, None, None, None, "") |
|
return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:]) |
|
|
|
def es_prev_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl): |
|
if None not in (ident, com, sug, ovl): |
|
responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl} |
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
idx = max(0, idx - 1) |
|
return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:]) |
|
|
|
def cb_next_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb): |
|
if None in (per, coh, nat, ovl_cb): |
|
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, per, coh, nat, ovl_cb, "All metrics must be answered before proceeding.") |
|
responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb} |
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
idx += 1 |
|
if idx >= len(examples): |
|
return ("π© Survey complete! Thank you.",) * 5 + (None, "") |
|
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:]) |
|
|
|
def cb_prev_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb): |
|
if None not in (per, coh, nat, ovl_cb): |
|
responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb} |
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
idx = max(0, idx - 1) |
|
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:]) |
|
|
|
|
|
|
|
with gr.Blocks(title="Human Evaluation Survey") as demo: |
|
|
|
with gr.Column() as login_panel: |
|
gr.Markdown("## Human Evaluation Survey") |
|
user_id_in = gr.Textbox(label="User ID") |
|
ds_dd = gr.Dropdown(list(DATASET_FILES.keys()), label="Select dataset") |
|
start_btn = gr.Button("Start survey") |
|
|
|
|
|
uid_state = gr.State(value="") |
|
ds_state = gr.State(value="") |
|
ex_state = gr.State(value=[]) |
|
idx_state = gr.State(value=0) |
|
resp_state = gr.State(value={}) |
|
|
|
|
|
with gr.Column(visible=False) as es_panel: |
|
bg = gr.Textbox(label="Background context", interactive=False) |
|
with gr.Row(): |
|
lbox = gr.Textbox(label="Response A", interactive=False) |
|
rbox = gr.Textbox(label="Response B", interactive=False) |
|
ident = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Identification (Ident.)") |
|
com = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Comforting (Com.)") |
|
sug = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Suggestion (Sug.)") |
|
ovl_es = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall (Ov.)") |
|
err_es = gr.HTML(visible=False) |
|
prog = gr.Textbox(label="Progress", interactive=False) |
|
with gr.Row(): |
|
prev_btn = gr.Button("β Prev") |
|
next_btn = gr.Button("βΆ Next") |
|
logout_es = gr.Button("πͺ Logout") |
|
|
|
|
|
with gr.Column(visible=False) as cb_panel: |
|
bg_cb = gr.Textbox(label="Background context", interactive=False) |
|
with gr.Row(): |
|
lbox_cb = gr.Textbox(label="Response A", interactive=False) |
|
rbox_cb = gr.Textbox(label="Response B", interactive=False) |
|
per = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Persuasiveness (Per.)") |
|
coh = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Coherence (Coh.)") |
|
nat = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Naturalness (Nat.)") |
|
ovl_cb = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall") |
|
err_cb = gr.HTML(visible=False) |
|
prog_cb = gr.Textbox(label="Progress", interactive=False) |
|
with gr.Row(): |
|
prev_cb = gr.Button("β Prev") |
|
next_cb = gr.Button("βΆ Next") |
|
logout_cb = gr.Button("πͺ Logout") |
|
|
|
|
|
start_btn.click( |
|
login_fn, |
|
inputs=[user_id_in, ds_dd], |
|
outputs=[ |
|
login_panel, es_panel, cb_panel, |
|
uid_state, ds_state, ex_state, idx_state, resp_state, |
|
bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es, |
|
bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb |
|
] |
|
) |
|
|
|
next_btn.click( |
|
es_next_fn, |
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es], |
|
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] ); |
|
|
|
prev_btn.click( |
|
es_prev_fn, |
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es], |
|
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] ); |
|
|
|
next_cb.click( |
|
cb_next_fn, |
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb], |
|
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]); |
|
|
|
prev_cb.click( |
|
cb_prev_fn, |
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb], |
|
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]); |
|
|
|
for logout_btn in (logout_es, logout_cb): |
|
logout_btn.click( |
|
logout_fn, |
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state], |
|
outputs=[ |
|
login_panel, es_panel, cb_panel, |
|
uid_state, ds_state, ex_state, idx_state, resp_state, |
|
bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es, |
|
bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb |
|
] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|