import gradio as gr import pandas as pd import random import os import csv import sys # print("">>> Gradio imported from:", gr.__file__) # print(">>> Gradio version :", getattr(gr, "__version__", "n/a")) # print(">>> sys.path contains :", sys.path[:5]) # ─── Configuration ───────────────────────────────────────────────────────────── DATASET_FILES = { "ESConv": "./csv/esconv.csv", "CraigslistBargain": "./csv/craigslist.csv", } # ─── Helper functions ────────────────────────────────────────────────────────── def prepare_examples(user_id: str, dataset: str): """ Read CSV, create Ours-vs-[PPDPP|DPDP] pairs, shuffle + randomise L/R. A deterministic seed (user_id+dataset) keeps the order stable for reloads. """ random.seed(f"{user_id}_{dataset}") df = pd.read_csv(DATASET_FILES[dataset]) pairs = [] for _, row in df.iterrows(): for other in ["PPDPP", "DPDP"]: pairs.append({ "background": row["Background Information"], "ours": row["Ours"], "other": row[other], "other_name": other, }) random.shuffle(pairs) prepared = [] for item in pairs: if random.random() < 0.5: left_text, right_text = item["ours"], item["other"] left_name, right_name = "Ours", item["other_name"] else: left_text, right_text = item["other"], item["ours"] left_name, right_name = item["other_name"], "Ours" prepared.append({ "background": item["background"], "left_text": left_text, "right_text": right_text, "left_name": left_name, "right_name": right_name, }) return prepared def save_all_to_csv(user_id, dataset, examples, responses): """Rewrite CSV completely (idempotent).""" if not responses: return filename = f"{user_id}_{dataset}_results.csv" metrics = list(next(iter(responses.values())).keys()) header = [ "UserID", "Dataset", "Background", "Response A Method", "Response B Method" ] + metrics with open(filename, "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=header) writer.writeheader() for idx in sorted(responses): ex = examples[idx] row = { "UserID": user_id, "Dataset": dataset, "Background": ex["background"], "Response A Method": ex["left_name"], "Response B Method": ex["right_name"], } row.update(responses[idx]) writer.writerow(row) def load_responses_from_csv(user_id, dataset, examples): """ Reconstruct a {idx: metrics-dict} mapping from an existing results file. Matching uses Background + response-method orientation to stay robust. """ filename = f"{user_id}_{dataset}_results.csv" if not os.path.exists(filename): return {} df = pd.read_csv(filename) idx_map = { (ex["background"], ex["left_name"], ex["right_name"]): i for i, ex in enumerate(examples) } responses = {} for _, row in df.iterrows(): key = ( row["Background"], row["Response A Method"], row["Response B Method"] ) if key in idx_map: idx = idx_map[key] metric_cols = [ c for c in row.index if c not in [ "UserID", "Dataset", "Background", "Response A Method", "Response B Method" ] ] responses[idx] = {k: row[k] for k in metric_cols} return responses # ─── Panel-specific loaders ──────────────────────────────────────────────────── def es_load_example(idx, examples, responses): ex = examples[idx] prev = responses.get(idx, {}) return ( ex["background"], ex["left_text"], ex["right_text"], f"Item {idx+1} of {len(examples)}", prev.get("Identification"), prev.get("Comforting"), prev.get("Suggestion"), prev.get("Overall"), "", ) def cb_load_example(idx, examples, responses): ex = examples[idx] prev = responses.get(idx, {}) return ( ex["background"], ex["left_text"], ex["right_text"], f"Item {idx+1} of {len(examples)}", prev.get("Persuasiveness"), prev.get("Coherence"), prev.get("Naturalness"), prev.get("Overall"), "", ) # ─── Login & Logout ──────────────────────────────────────────────────────────── def login_fn(user_id, dataset): if not user_id or not dataset: raise gr.Error("Please enter your User ID and select a dataset.") examples = prepare_examples(user_id, dataset) responses = load_responses_from_csv(user_id, dataset, examples) idx = 0 if dataset == "ESConv": bg, lft, rgt, prog, ident, com, sug, ovl, err_es = es_load_example(idx, examples, responses) bg_cb = lft_cb = rgt_cb = prog_cb = "" per = coh = nat = ovl_cb = None err_cb = "" return ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), user_id, dataset, examples, idx, responses, bg, lft, rgt, prog, ident, com, sug, ovl, err_es, bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb, ) else: bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb = cb_load_example(idx, examples, responses) bg = lft = rgt = prog = "" ident = com = sug = ovl = None err_es = "" return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), user_id, dataset, examples, idx, responses, bg, lft, rgt, prog, ident, com, sug, ovl, err_es, bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb, ) def logout_fn(user_id, dataset, examples, idx, responses): if dataset: save_all_to_csv(user_id, dataset, examples, responses) return ( gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), "", "", [], 0, {}, *[""] * 9, *[""] * 10 ) # ─── Navigation callback helpers ─────────────────────────────────────────────── def es_next_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl): if None in (ident, com, sug, ovl): return (*es_load_example(idx, examples, responses)[:4], idx, responses, ident, com, sug, ovl, "All metrics must be answered before proceeding.") responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl} save_all_to_csv(user_id, dataset, examples, responses) idx += 1 if idx >= len(examples): return ("🚩 Survey complete! Thank you.",) * 4 + (idx, responses, None, None, None, None, "") return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:]) def es_prev_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl): if None not in (ident, com, sug, ovl): responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl} save_all_to_csv(user_id, dataset, examples, responses) idx = max(0, idx - 1) return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:]) def cb_next_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb): if None in (per, coh, nat, ovl_cb): return (*cb_load_example(idx, examples, responses)[:4], idx, responses, per, coh, nat, ovl_cb, "All metrics must be answered before proceeding.") responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb} save_all_to_csv(user_id, dataset, examples, responses) idx += 1 if idx >= len(examples): return ("🚩 Survey complete! Thank you.",) * 5 + (None, "") return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:]) def cb_prev_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb): if None not in (per, coh, nat, ovl_cb): responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb} save_all_to_csv(user_id, dataset, examples, responses) idx = max(0, idx - 1) return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:]) # ─── Build Gradio App ────────────────────────────────────────────────────────── with gr.Blocks(title="Human Evaluation Survey") as demo: # Login panel with gr.Column() as login_panel: gr.Markdown("## Human Evaluation Survey") user_id_in = gr.Textbox(label="User ID") ds_dd = gr.Dropdown(list(DATASET_FILES.keys()), label="Select dataset") start_btn = gr.Button("Start survey") # Shared state uid_state = gr.State(value="") ds_state = gr.State(value="") ex_state = gr.State(value=[]) idx_state = gr.State(value=0) resp_state = gr.State(value={}) # ESConv Panel with gr.Column(visible=False) as es_panel: bg = gr.Textbox(label="Background context", interactive=False) with gr.Row(): lbox = gr.Textbox(label="Response A", interactive=False) rbox = gr.Textbox(label="Response B", interactive=False) ident = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Identification (Ident.)") com = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Comforting (Com.)") sug = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Suggestion (Sug.)") ovl_es = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall (Ov.)") err_es = gr.HTML(visible=False) prog = gr.Textbox(label="Progress", interactive=False) with gr.Row(): prev_btn = gr.Button("◀ Prev") next_btn = gr.Button("▶ Next") logout_es = gr.Button("🚪 Logout") # CraigslistBargain Panel with gr.Column(visible=False) as cb_panel: bg_cb = gr.Textbox(label="Background context", interactive=False) with gr.Row(): lbox_cb = gr.Textbox(label="Response A", interactive=False) rbox_cb = gr.Textbox(label="Response B", interactive=False) per = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Persuasiveness (Per.)") coh = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Coherence (Coh.)") nat = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Naturalness (Nat.)") ovl_cb = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall") err_cb = gr.HTML(visible=False) prog_cb = gr.Textbox(label="Progress", interactive=False) with gr.Row(): prev_cb = gr.Button("◀ Prev") next_cb = gr.Button("▶ Next") logout_cb = gr.Button("🚪 Logout") # Wiring callbacks start_btn.click( login_fn, inputs=[user_id_in, ds_dd], outputs=[ login_panel, es_panel, cb_panel, uid_state, ds_state, ex_state, idx_state, resp_state, bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es, bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb ] ) next_btn.click( es_next_fn, inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es], outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] ); prev_btn.click( es_prev_fn, inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es], outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] ); next_cb.click( cb_next_fn, inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb], outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]); prev_cb.click( cb_prev_fn, inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb], outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]); for logout_btn in (logout_es, logout_cb): logout_btn.click( logout_fn, inputs=[uid_state, ds_state, ex_state, idx_state, resp_state], outputs=[ login_panel, es_panel, cb_panel, uid_state, ds_state, ex_state, idx_state, resp_state, bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es, bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb ] ) if __name__ == "__main__": demo.launch(share=True)