EMNLP / app.py
sutdaiday
fixed
15bfa69
import gradio as gr
import pandas as pd
import random
import os
import csv
import sys
# print("">>> Gradio imported from:", gr.__file__)
# print(">>> Gradio version :", getattr(gr, "__version__", "n/a"))
# print(">>> sys.path contains :", sys.path[:5])
# ─── Configuration ─────────────────────────────────────────────────────────────
DATASET_FILES = {
"ESConv": "./csv/esconv.csv",
"CraigslistBargain": "./csv/craigslist.csv",
}
# ─── Helper functions ──────────────────────────────────────────────────────────
def prepare_examples(user_id: str, dataset: str):
"""
Read CSV, create Ours-vs-[PPDPP|DPDP] pairs, shuffle + randomise L/R.
A deterministic seed (user_id+dataset) keeps the order stable for reloads.
"""
random.seed(f"{user_id}_{dataset}")
df = pd.read_csv(DATASET_FILES[dataset])
pairs = []
for _, row in df.iterrows():
for other in ["PPDPP", "DPDP"]:
pairs.append({
"background": row["Background Information"],
"ours": row["Ours"],
"other": row[other],
"other_name": other,
})
random.shuffle(pairs)
prepared = []
for item in pairs:
if random.random() < 0.5:
left_text, right_text = item["ours"], item["other"]
left_name, right_name = "Ours", item["other_name"]
else:
left_text, right_text = item["other"], item["ours"]
left_name, right_name = item["other_name"], "Ours"
prepared.append({
"background": item["background"],
"left_text": left_text,
"right_text": right_text,
"left_name": left_name,
"right_name": right_name,
})
return prepared
def save_all_to_csv(user_id, dataset, examples, responses):
"""Rewrite CSV completely (idempotent)."""
if not responses:
return
filename = f"{user_id}_{dataset}_results.csv"
metrics = list(next(iter(responses.values())).keys())
header = [
"UserID", "Dataset", "Background",
"Response A Method", "Response B Method"
] + metrics
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=header)
writer.writeheader()
for idx in sorted(responses):
ex = examples[idx]
row = {
"UserID": user_id,
"Dataset": dataset,
"Background": ex["background"],
"Response A Method": ex["left_name"],
"Response B Method": ex["right_name"],
}
row.update(responses[idx])
writer.writerow(row)
def load_responses_from_csv(user_id, dataset, examples):
"""
Reconstruct a {idx: metrics-dict} mapping from an existing results file.
Matching uses Background + response-method orientation to stay robust.
"""
filename = f"{user_id}_{dataset}_results.csv"
if not os.path.exists(filename):
return {}
df = pd.read_csv(filename)
idx_map = {
(ex["background"], ex["left_name"], ex["right_name"]): i
for i, ex in enumerate(examples)
}
responses = {}
for _, row in df.iterrows():
key = (
row["Background"],
row["Response A Method"],
row["Response B Method"]
)
if key in idx_map:
idx = idx_map[key]
metric_cols = [
c for c in row.index
if c not in [
"UserID", "Dataset", "Background",
"Response A Method", "Response B Method"
]
]
responses[idx] = {k: row[k] for k in metric_cols}
return responses
# ─── Panel-specific loaders ────────────────────────────────────────────────────
def es_load_example(idx, examples, responses):
ex = examples[idx]
prev = responses.get(idx, {})
return (
ex["background"], ex["left_text"], ex["right_text"],
f"Item {idx+1} of {len(examples)}",
prev.get("Identification"), prev.get("Comforting"),
prev.get("Suggestion"), prev.get("Overall"), "",
)
def cb_load_example(idx, examples, responses):
ex = examples[idx]
prev = responses.get(idx, {})
return (
ex["background"], ex["left_text"], ex["right_text"],
f"Item {idx+1} of {len(examples)}",
prev.get("Persuasiveness"), prev.get("Coherence"),
prev.get("Naturalness"), prev.get("Overall"), "",
)
# ─── Login & Logout ────────────────────────────────────────────────────────────
def login_fn(user_id, dataset):
if not user_id or not dataset:
raise gr.Error("Please enter your User ID and select a dataset.")
examples = prepare_examples(user_id, dataset)
responses = load_responses_from_csv(user_id, dataset, examples)
idx = 0
if dataset == "ESConv":
bg, lft, rgt, prog, ident, com, sug, ovl, err_es = es_load_example(idx, examples, responses)
bg_cb = lft_cb = rgt_cb = prog_cb = ""
per = coh = nat = ovl_cb = None
err_cb = ""
return (
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=False),
user_id, dataset, examples, idx, responses,
bg, lft, rgt, prog, ident, com, sug, ovl, err_es,
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb,
)
else:
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb = cb_load_example(idx, examples, responses)
bg = lft = rgt = prog = ""
ident = com = sug = ovl = None
err_es = ""
return (
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
user_id, dataset, examples, idx, responses,
bg, lft, rgt, prog, ident, com, sug, ovl, err_es,
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb,
)
def logout_fn(user_id, dataset, examples, idx, responses):
if dataset:
save_all_to_csv(user_id, dataset, examples, responses)
return (
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=False),
"", "", [], 0, {},
*[""] * 9,
*[""] * 10
)
# ─── Navigation callback helpers ───────────────────────────────────────────────
def es_next_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
if None in (ident, com, sug, ovl):
return (*es_load_example(idx, examples, responses)[:4], idx, responses, ident, com, sug, ovl, "All metrics must be answered before proceeding.")
responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl}
save_all_to_csv(user_id, dataset, examples, responses)
idx += 1
if idx >= len(examples):
return ("🚩 Survey complete! Thank you.",) * 4 + (idx, responses, None, None, None, None, "")
return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:])
def es_prev_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
if None not in (ident, com, sug, ovl):
responses[idx] = {"Identification": ident, "Comforting": com, "Suggestion": sug, "Overall": ovl}
save_all_to_csv(user_id, dataset, examples, responses)
idx = max(0, idx - 1)
return (*es_load_example(idx, examples, responses)[:4], idx, responses, *es_load_example(idx, examples, responses)[4:])
def cb_next_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
if None in (per, coh, nat, ovl_cb):
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, per, coh, nat, ovl_cb, "All metrics must be answered before proceeding.")
responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb}
save_all_to_csv(user_id, dataset, examples, responses)
idx += 1
if idx >= len(examples):
return ("🚩 Survey complete! Thank you.",) * 5 + (None, "")
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:])
def cb_prev_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
if None not in (per, coh, nat, ovl_cb):
responses[idx] = {"Persuasiveness": per, "Coherence": coh, "Naturalness": nat, "Overall": ovl_cb}
save_all_to_csv(user_id, dataset, examples, responses)
idx = max(0, idx - 1)
return (*cb_load_example(idx, examples, responses)[:4], idx, responses, *cb_load_example(idx, examples, responses)[4:])
# ─── Build Gradio App ──────────────────────────────────────────────────────────
with gr.Blocks(title="Human Evaluation Survey") as demo:
# Login panel
with gr.Column() as login_panel:
gr.Markdown("## Human Evaluation Survey")
user_id_in = gr.Textbox(label="User ID")
ds_dd = gr.Dropdown(list(DATASET_FILES.keys()), label="Select dataset")
start_btn = gr.Button("Start survey")
# Shared state
uid_state = gr.State(value="")
ds_state = gr.State(value="")
ex_state = gr.State(value=[])
idx_state = gr.State(value=0)
resp_state = gr.State(value={})
# ESConv Panel
with gr.Column(visible=False) as es_panel:
bg = gr.Textbox(label="Background context", interactive=False)
with gr.Row():
lbox = gr.Textbox(label="Response A", interactive=False)
rbox = gr.Textbox(label="Response B", interactive=False)
ident = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Identification (Ident.)")
com = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Comforting (Com.)")
sug = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Suggestion (Sug.)")
ovl_es = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall (Ov.)")
err_es = gr.HTML(visible=False)
prog = gr.Textbox(label="Progress", interactive=False)
with gr.Row():
prev_btn = gr.Button("β—€ Prev")
next_btn = gr.Button("β–Ά Next")
logout_es = gr.Button("πŸšͺ Logout")
# CraigslistBargain Panel
with gr.Column(visible=False) as cb_panel:
bg_cb = gr.Textbox(label="Background context", interactive=False)
with gr.Row():
lbox_cb = gr.Textbox(label="Response A", interactive=False)
rbox_cb = gr.Textbox(label="Response B", interactive=False)
per = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Persuasiveness (Per.)")
coh = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Coherence (Coh.)")
nat = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Naturalness (Nat.)")
ovl_cb = gr.Radio(["Prefer Response A", "Prefer Response B", "No preference"], label="Overall")
err_cb = gr.HTML(visible=False)
prog_cb = gr.Textbox(label="Progress", interactive=False)
with gr.Row():
prev_cb = gr.Button("β—€ Prev")
next_cb = gr.Button("β–Ά Next")
logout_cb = gr.Button("πŸšͺ Logout")
# Wiring callbacks
start_btn.click(
login_fn,
inputs=[user_id_in, ds_dd],
outputs=[
login_panel, es_panel, cb_panel,
uid_state, ds_state, ex_state, idx_state, resp_state,
bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es,
bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb
]
)
next_btn.click(
es_next_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] );
prev_btn.click(
es_prev_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es] );
next_cb.click(
cb_next_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]);
prev_cb.click(
cb_prev_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb]);
for logout_btn in (logout_es, logout_cb):
logout_btn.click(
logout_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state],
outputs=[
login_panel, es_panel, cb_panel,
uid_state, ds_state, ex_state, idx_state, resp_state,
bg, lbox, rbox, prog, ident, com, sug, ovl_es, err_es,
bg_cb, lbox_cb, rbox_cb, prog_cb, per, coh, nat, ovl_cb, err_cb
]
)
if __name__ == "__main__":
demo.launch(share=True)