Update app.py
Browse files
app.py
CHANGED
@@ -9,23 +9,55 @@ def create_reranking_interface(task_data):
|
|
9 |
results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
|
10 |
completed_samples = {s["id"]: False for s in samples}
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
if os.path.exists(output_path):
|
15 |
try:
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
except Exception as e:
|
24 |
-
|
|
|
25 |
|
26 |
-
# Create the main interface
|
27 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
28 |
gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
|
|
|
29 |
with gr.Accordion("Instructions", open=True):
|
30 |
gr.Markdown("""
|
31 |
## Task Instructions
|
@@ -34,413 +66,156 @@ def create_reranking_interface(task_data):
|
|
34 |
|
35 |
### How to use this interface:
|
36 |
1. Read the query at the top
|
37 |
-
2.
|
38 |
-
3.
|
39 |
-
4.
|
40 |
-
5.
|
41 |
-
6.
|
|
|
42 |
""".format(instructions=task_data["instructions"]))
|
43 |
|
44 |
-
# State variables
|
45 |
current_sample_id = gr.State(value=samples[0]["id"])
|
46 |
|
47 |
-
# Progress tracking
|
48 |
with gr.Row():
|
49 |
-
progress_text = gr.Textbox(label="Progress", value=f"Progress:
|
50 |
status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
|
51 |
|
52 |
-
# Query display
|
53 |
with gr.Group():
|
54 |
gr.Markdown("## Query:")
|
55 |
-
query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False
|
56 |
|
57 |
-
# Document ranking section
|
58 |
gr.Markdown("## Documents to Rank:")
|
59 |
|
60 |
-
# Create
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
# Create dynamic HTML for the ranking interface
|
64 |
-
def generate_ranking_html(docs, existing_ranks=None):
|
65 |
-
"""Generate HTML for number-based ranking interface."""
|
66 |
-
if not docs:
|
67 |
-
return ""
|
68 |
-
|
69 |
-
# Use existing ranks if available
|
70 |
-
ranks = list(range(1, len(docs) + 1))
|
71 |
-
if existing_ranks and len(existing_ranks) == len(docs):
|
72 |
-
ranks = existing_ranks
|
73 |
-
|
74 |
-
html = """
|
75 |
-
<style>
|
76 |
-
.doc-container {
|
77 |
-
margin-bottom: 15px;
|
78 |
-
border: 1px solid #ddd;
|
79 |
-
border-radius: 8px;
|
80 |
-
padding: 15px;
|
81 |
-
background-color: #f9f9f9;
|
82 |
-
}
|
83 |
-
.doc-header {
|
84 |
-
display: flex;
|
85 |
-
align-items: center;
|
86 |
-
margin-bottom: 10px;
|
87 |
-
}
|
88 |
-
.doc-rank {
|
89 |
-
display: flex;
|
90 |
-
align-items: center;
|
91 |
-
margin-right: 15px;
|
92 |
-
}
|
93 |
-
.rank-label {
|
94 |
-
font-weight: bold;
|
95 |
-
margin-right: 8px;
|
96 |
-
min-width: 80px;
|
97 |
-
}
|
98 |
-
.rank-input {
|
99 |
-
width: 60px;
|
100 |
-
padding: 5px;
|
101 |
-
border: 2px solid #007bff;
|
102 |
-
border-radius: 4px;
|
103 |
-
text-align: center;
|
104 |
-
font-size: 16px;
|
105 |
-
}
|
106 |
-
.doc-content {
|
107 |
-
padding: 10px;
|
108 |
-
background-color: white;
|
109 |
-
border-radius: 4px;
|
110 |
-
border-left: 4px solid #007bff;
|
111 |
-
white-space: pre-wrap;
|
112 |
-
font-family: sans-serif;
|
113 |
-
line-height: 1.5;
|
114 |
-
}
|
115 |
-
</style>
|
116 |
-
|
117 |
-
<div id="ranking-form">
|
118 |
-
<input type="hidden" id="ranking-state" value="">
|
119 |
-
"""
|
120 |
-
|
121 |
-
# Add each document with a number input
|
122 |
-
for i, doc in enumerate(docs):
|
123 |
-
import html as html_lib
|
124 |
-
escaped_doc = html_lib.escape(doc)
|
125 |
-
current_rank = ranks[i] if i < len(ranks) else i + 1
|
126 |
-
|
127 |
-
html += f"""
|
128 |
-
<div class="doc-container" id="doc-{i}">
|
129 |
-
<div class="doc-header">
|
130 |
-
<div class="doc-rank">
|
131 |
-
<span class="rank-label">Document {i+1} Rank:</span>
|
132 |
-
<input type="number" class="rank-input" id="rank-{i}" value="{current_rank}"
|
133 |
-
min="1" max="{len(docs)}" data-doc-id="{i}"
|
134 |
-
onchange="updateRankings()">
|
135 |
-
</div>
|
136 |
-
</div>
|
137 |
-
<div class="doc-content">{escaped_doc}</div>
|
138 |
-
</div>
|
139 |
-
"""
|
140 |
-
|
141 |
-
# Add validation and state tracking JS
|
142 |
-
html += """
|
143 |
-
<script>
|
144 |
-
function updateRankings() {
|
145 |
-
// Collect all rank inputs
|
146 |
-
const inputs = document.querySelectorAll('.rank-input');
|
147 |
-
const rankings = [];
|
148 |
-
|
149 |
-
// Get values and highlight duplicates
|
150 |
-
const values = new Map();
|
151 |
-
const duplicates = new Set();
|
152 |
-
|
153 |
-
inputs.forEach(input => {
|
154 |
-
const docId = parseInt(input.getAttribute('data-doc-id'));
|
155 |
-
const rank = parseInt(input.value);
|
156 |
-
|
157 |
-
// Store value
|
158 |
-
rankings.push({
|
159 |
-
docId: docId,
|
160 |
-
rank: rank
|
161 |
-
});
|
162 |
-
|
163 |
-
// Check for duplicates
|
164 |
-
if (values.has(rank)) {
|
165 |
-
duplicates.add(rank);
|
166 |
-
} else {
|
167 |
-
values.set(rank, docId);
|
168 |
-
}
|
169 |
-
|
170 |
-
// Reset styling
|
171 |
-
input.style.borderColor = '#007bff';
|
172 |
-
});
|
173 |
-
|
174 |
-
// Highlight duplicates
|
175 |
-
inputs.forEach(input => {
|
176 |
-
const rank = parseInt(input.value);
|
177 |
-
if (duplicates.has(rank)) {
|
178 |
-
input.style.borderColor = '#ff3860';
|
179 |
-
}
|
180 |
-
});
|
181 |
-
|
182 |
-
// Store to hidden input
|
183 |
-
const stateInput = document.getElementById('ranking-state');
|
184 |
-
if (stateInput) {
|
185 |
-
stateInput.value = JSON.stringify(rankings);
|
186 |
-
}
|
187 |
-
|
188 |
-
// Update gradio text area
|
189 |
-
const textArea = document.querySelector('#rankings-state-input textarea');
|
190 |
-
if (textArea) {
|
191 |
-
textArea.value = JSON.stringify(rankings);
|
192 |
-
const event = new Event('input', { bubbles: true });
|
193 |
-
textArea.dispatchEvent(event);
|
194 |
-
}
|
195 |
-
}
|
196 |
-
|
197 |
-
// Initialize on page load
|
198 |
-
document.addEventListener('DOMContentLoaded', updateRankings);
|
199 |
-
// Also use a delay as a backup
|
200 |
-
setTimeout(updateRankings, 500);
|
201 |
-
</script>
|
202 |
-
</div>
|
203 |
-
"""
|
204 |
-
|
205 |
-
return html
|
206 |
-
|
207 |
-
# Initial ranking HTML
|
208 |
-
ranking_html = gr.HTML(
|
209 |
-
generate_ranking_html(samples[0]["candidates"]),
|
210 |
-
elem_id="ranking-container"
|
211 |
-
)
|
212 |
-
|
213 |
-
# Hidden input for state
|
214 |
-
rankings_state = gr.Textbox(
|
215 |
-
value="[]",
|
216 |
-
visible=False,
|
217 |
-
elem_id="rankings-state-input"
|
218 |
-
)
|
219 |
-
|
220 |
-
# Validation message
|
221 |
-
validation_msg = gr.Textbox(
|
222 |
-
label="Validation",
|
223 |
-
interactive=False
|
224 |
-
)
|
225 |
-
|
226 |
-
# Navigation and submission buttons
|
227 |
with gr.Row():
|
228 |
prev_btn = gr.Button("← Previous Query", size="sm")
|
229 |
-
validate_btn = gr.Button("Validate Rankings", variant="secondary")
|
230 |
submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
|
231 |
next_btn = gr.Button("Next Query →", size="sm")
|
|
|
232 |
save_btn = gr.Button("💾 Save All Results", variant="secondary")
|
233 |
|
234 |
-
# Function to load a sample
|
235 |
def load_sample(sample_id):
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
if not ranks:
|
271 |
-
return "No valid ranks found."
|
272 |
-
|
273 |
-
# Check for duplicates
|
274 |
-
if len(set(ranks)) != len(ranks):
|
275 |
-
# Find duplicates
|
276 |
-
dupes = {}
|
277 |
-
for r in ranks:
|
278 |
-
dupes[r] = dupes.get(r, 0) + 1
|
279 |
-
duplicates = [r for r, count in dupes.items() if count > 1]
|
280 |
-
return f"⚠️ Duplicate ranks found: {', '.join(map(str, sorted(duplicates)))}. Each document must have a unique rank."
|
281 |
-
|
282 |
-
# Check for complete sequence
|
283 |
-
max_rank = max(ranks)
|
284 |
-
expected = set(range(1, max_rank + 1))
|
285 |
-
if set(ranks) != expected:
|
286 |
-
missing = sorted(expected - set(ranks))
|
287 |
-
return f"⚠️ Missing ranks: {', '.join(map(str, missing))}. Ranks must be consecutive from 1 to {max_rank}."
|
288 |
-
|
289 |
-
return "✅ Rankings are valid. Ready to submit."
|
290 |
-
|
291 |
-
except json.JSONDecodeError:
|
292 |
-
return "Error parsing ranking data."
|
293 |
-
except Exception as e:
|
294 |
-
return f"Error validating rankings: {str(e)}"
|
295 |
-
|
296 |
-
# Function to save rankings from JSON state
|
297 |
-
def save_ranking_state(sample_id, state_json):
|
298 |
-
try:
|
299 |
-
if not state_json or state_json == "[]":
|
300 |
-
return "Please rank all documents before submitting.", progress_text.value
|
301 |
-
|
302 |
-
# Get the sample
|
303 |
-
sample = next((s for s in samples if s["id"] == sample_id), None)
|
304 |
-
if not sample:
|
305 |
-
return "⚠️ Sample not found", progress_text.value
|
306 |
-
|
307 |
-
# Parse the state
|
308 |
-
state = json.loads(state_json)
|
309 |
-
|
310 |
-
# Create a rankings array in the correct order
|
311 |
-
num_candidates = len(sample["candidates"])
|
312 |
-
rankings = [0] * num_candidates
|
313 |
-
|
314 |
-
# Fill in rankings from state
|
315 |
-
for item in state:
|
316 |
-
doc_id = item.get("docId")
|
317 |
-
rank = item.get("rank")
|
318 |
-
if doc_id is not None and doc_id < num_candidates and rank is not None:
|
319 |
-
rankings[doc_id] = rank
|
320 |
-
|
321 |
-
# Validate rankings
|
322 |
-
if any(r == 0 for r in rankings):
|
323 |
-
return "⚠️ Not all documents have rankings", progress_text.value
|
324 |
-
|
325 |
-
if sorted(rankings) != list(range(1, num_candidates + 1)):
|
326 |
-
return f"⚠️ Invalid ranking sequence. Please use each number from 1 to {num_candidates} exactly once.", progress_text.value
|
327 |
-
|
328 |
-
# Create annotation
|
329 |
-
annotation = {"sample_id": sample_id, "rankings": rankings}
|
330 |
-
|
331 |
-
# Update or add to results
|
332 |
-
existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
|
333 |
-
if existing_idx is not None:
|
334 |
-
results["annotations"][existing_idx] = annotation
|
335 |
-
else:
|
336 |
-
results["annotations"].append(annotation)
|
337 |
-
|
338 |
-
# Mark as completed
|
339 |
-
completed_samples[sample_id] = True
|
340 |
-
|
341 |
-
# Save to file
|
342 |
-
with open(output_path, "w") as f:
|
343 |
-
json.dump(results, f, indent=2)
|
344 |
-
|
345 |
-
# Update progress
|
346 |
-
progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
347 |
-
|
348 |
-
return f"✅ Rankings saved successfully! ({sum(completed_samples.values())}/{len(samples)} completed)", progress
|
349 |
-
except Exception as e:
|
350 |
-
import traceback
|
351 |
-
print(traceback.format_exc())
|
352 |
-
return f"⚠️ Error saving rankings: {str(e)}", progress_text.value
|
353 |
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
if
|
358 |
return current_id
|
359 |
-
|
360 |
-
|
|
|
|
|
|
|
|
|
361 |
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
if
|
366 |
return current_id
|
367 |
-
|
368 |
-
|
|
|
|
|
|
|
|
|
369 |
|
370 |
-
# Function to save all results
|
371 |
def save_results():
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
return f"⚠️ Error saving results file: {str(e)}"
|
378 |
|
379 |
-
#
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
|
|
|
386 |
submit_btn.click(
|
387 |
-
|
388 |
-
inputs=[current_sample_id
|
389 |
outputs=[status_box, progress_text]
|
390 |
)
|
391 |
|
392 |
next_btn.click(
|
393 |
-
|
394 |
-
inputs=[current_sample_id],
|
395 |
outputs=[current_sample_id]
|
396 |
).then(
|
397 |
load_sample,
|
398 |
inputs=[current_sample_id],
|
399 |
-
outputs=[
|
400 |
-
current_sample_id,
|
401 |
-
query_text,
|
402 |
-
ranking_html,
|
403 |
-
progress_text,
|
404 |
-
status_box,
|
405 |
-
rankings_state,
|
406 |
-
validation_msg
|
407 |
-
]
|
408 |
)
|
409 |
|
410 |
prev_btn.click(
|
411 |
-
|
412 |
-
inputs=[current_sample_id],
|
413 |
outputs=[current_sample_id]
|
414 |
).then(
|
415 |
load_sample,
|
416 |
inputs=[current_sample_id],
|
417 |
-
outputs=[
|
418 |
-
current_sample_id,
|
419 |
-
query_text,
|
420 |
-
ranking_html,
|
421 |
-
progress_text,
|
422 |
-
status_box,
|
423 |
-
rankings_state,
|
424 |
-
validation_msg
|
425 |
-
]
|
426 |
)
|
427 |
|
428 |
save_btn.click(save_results, outputs=[status_box])
|
429 |
-
|
430 |
-
# Initialize with first sample
|
431 |
-
demo.load(
|
432 |
-
lambda: load_sample(samples[0]['id']),
|
433 |
-
outputs=[
|
434 |
-
current_sample_id,
|
435 |
-
query_text,
|
436 |
-
ranking_html,
|
437 |
-
progress_text,
|
438 |
-
status_box,
|
439 |
-
rankings_state,
|
440 |
-
validation_msg
|
441 |
-
]
|
442 |
-
)
|
443 |
-
|
444 |
return demo
|
445 |
|
446 |
# Main app with file upload capability
|
@@ -493,7 +268,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
493 |
""")
|
494 |
|
495 |
with gr.Row():
|
496 |
-
with gr.Column():
|
497 |
file_input = gr.File(label="Upload a task file (JSON)")
|
498 |
load_btn = gr.Button("Load Task")
|
499 |
message = gr.Textbox(label="Status", interactive=False)
|
@@ -539,7 +314,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
539 |
download_results_btn = gr.Button("Download Results")
|
540 |
|
541 |
# Right side - will contain the actual interface
|
542 |
-
with gr.Column():
|
543 |
task_container = gr.HTML()
|
544 |
|
545 |
# Handle file upload and storage
|
@@ -650,11 +425,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
650 |
|
651 |
# Add download options
|
652 |
with gr.Row():
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download", value=None)
|
657 |
-
download_selected_btn = gr.Button("Download Selected")
|
658 |
|
659 |
# Add results visualization placeholder
|
660 |
gr.Markdown("### Results Visualization")
|
|
|
9 |
results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
|
10 |
completed_samples = {s["id"]: False for s in samples}
|
11 |
|
12 |
+
def save_ranking(rankings, sample_id):
|
13 |
+
"""Save the current set of rankings."""
|
|
|
14 |
try:
|
15 |
+
# Check if all documents have rankings
|
16 |
+
all_ranked = all(r is not None and r != "" for r in rankings)
|
17 |
+
if not all_ranked:
|
18 |
+
return "⚠️ Please assign a rank to all documents before submitting", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
19 |
+
|
20 |
+
# Convert rankings to integers with better error handling
|
21 |
+
try:
|
22 |
+
processed_rankings = [int(r) for r in rankings]
|
23 |
+
except ValueError:
|
24 |
+
return "⚠️ Invalid ranking value. Please use only numbers.", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
25 |
+
|
26 |
+
# Check for duplicate rankings
|
27 |
+
if len(set(processed_rankings)) != len(processed_rankings):
|
28 |
+
return "⚠️ Each document must have a unique rank. Please review your rankings.", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
29 |
+
|
30 |
+
# Store this annotation in memory
|
31 |
+
existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
|
32 |
+
if existing_idx is not None:
|
33 |
+
results["annotations"][existing_idx] = {
|
34 |
+
"sample_id": sample_id,
|
35 |
+
"rankings": processed_rankings
|
36 |
+
}
|
37 |
+
else:
|
38 |
+
results["annotations"].append({
|
39 |
+
"sample_id": sample_id,
|
40 |
+
"rankings": processed_rankings
|
41 |
+
})
|
42 |
+
|
43 |
+
completed_samples[sample_id] = True
|
44 |
+
|
45 |
+
# Try to save to file, but continue even if it fails
|
46 |
+
try:
|
47 |
+
output_path = f"{task_data['task_name']}_human_results.json"
|
48 |
+
with open(output_path, "w") as f:
|
49 |
+
json.dump(results, f, indent=2)
|
50 |
+
return f"✅ Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
51 |
+
except:
|
52 |
+
# If file saving fails, still mark as success since we saved in memory
|
53 |
+
return f"✅ Rankings saved in memory (file save failed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
54 |
except Exception as e:
|
55 |
+
# Return specific error message
|
56 |
+
return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
57 |
|
|
|
58 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
59 |
gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
|
60 |
+
|
61 |
with gr.Accordion("Instructions", open=True):
|
62 |
gr.Markdown("""
|
63 |
## Task Instructions
|
|
|
66 |
|
67 |
### How to use this interface:
|
68 |
1. Read the query at the top
|
69 |
+
2. Review each document carefully
|
70 |
+
3. Assign a rank to each document (1 = most relevant, higher numbers = less relevant)
|
71 |
+
4. Each document must have a unique rank
|
72 |
+
5. Click "Submit Rankings" when you're done with the current query
|
73 |
+
6. Use "Previous" and "Next" to navigate between queries
|
74 |
+
7. Click "Save All Results" periodically to ensure your work is saved
|
75 |
""".format(instructions=task_data["instructions"]))
|
76 |
|
|
|
77 |
current_sample_id = gr.State(value=samples[0]["id"])
|
78 |
|
|
|
79 |
with gr.Row():
|
80 |
+
progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
|
81 |
status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
|
82 |
|
|
|
83 |
with gr.Group():
|
84 |
gr.Markdown("## Query:")
|
85 |
+
query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
|
86 |
|
|
|
87 |
gr.Markdown("## Documents to Rank:")
|
88 |
|
89 |
+
# Create document displays and ranking dropdowns in synchronized pairs
|
90 |
+
doc_containers = []
|
91 |
+
ranking_dropdowns = []
|
92 |
+
|
93 |
+
with gr.Column():
|
94 |
+
for i, doc in enumerate(samples[0]["candidates"]):
|
95 |
+
with gr.Row():
|
96 |
+
doc_box = gr.Textbox(
|
97 |
+
value=doc,
|
98 |
+
label=f"Document {i+1}",
|
99 |
+
interactive=False
|
100 |
+
)
|
101 |
+
dropdown = gr.Dropdown(
|
102 |
+
choices=[str(j) for j in range(1, len(samples[0]["candidates"])+1)],
|
103 |
+
label=f"Rank",
|
104 |
+
value=""
|
105 |
+
)
|
106 |
+
doc_containers.append(doc_box)
|
107 |
+
ranking_dropdowns.append(dropdown)
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
with gr.Row():
|
110 |
prev_btn = gr.Button("← Previous Query", size="sm")
|
|
|
111 |
submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
|
112 |
next_btn = gr.Button("Next Query →", size="sm")
|
113 |
+
|
114 |
save_btn = gr.Button("💾 Save All Results", variant="secondary")
|
115 |
|
|
|
116 |
def load_sample(sample_id):
|
117 |
+
"""Load a specific sample into the interface."""
|
118 |
+
sample = next((s for s in samples if s["id"] == sample_id), None)
|
119 |
+
if not sample:
|
120 |
+
return [query_text.value] + [d.value for d in doc_containers] + [""] * len(ranking_dropdowns) + [current_sample_id.value, progress_text.value, status_box.value]
|
121 |
+
|
122 |
+
# Update query
|
123 |
+
new_query = sample["query"]
|
124 |
+
|
125 |
+
# Update documents
|
126 |
+
new_docs = []
|
127 |
+
for i, doc in enumerate(sample["candidates"]):
|
128 |
+
if i < len(doc_containers):
|
129 |
+
new_docs.append(doc)
|
130 |
+
|
131 |
+
# Initialize rankings
|
132 |
+
new_rankings = [""] * len(ranking_dropdowns)
|
133 |
+
|
134 |
+
# Check if this sample has already been annotated
|
135 |
+
existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
|
136 |
+
if existing_annotation:
|
137 |
+
# Restore previous rankings
|
138 |
+
for i, rank in enumerate(existing_annotation["rankings"]):
|
139 |
+
if i < len(new_rankings) and rank is not None:
|
140 |
+
new_rankings[i] = str(rank)
|
141 |
+
|
142 |
+
# Update progress
|
143 |
+
current_idx = samples.index(sample)
|
144 |
+
new_progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
145 |
+
|
146 |
+
new_status = f"Viewing query {current_idx + 1} of {len(samples)}"
|
147 |
+
if completed_samples[sample_id]:
|
148 |
+
new_status += " (already completed)"
|
149 |
+
|
150 |
+
return [new_query] + new_docs + new_rankings + [sample["id"], new_progress, new_status]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
+
def next_sample(current_id):
|
153 |
+
"""Load the next sample."""
|
154 |
+
current_sample = next((s for s in samples if s["id"] == current_id), None)
|
155 |
+
if not current_sample:
|
156 |
return current_id
|
157 |
+
|
158 |
+
current_idx = samples.index(current_sample)
|
159 |
+
if current_idx < len(samples) - 1:
|
160 |
+
next_sample = samples[current_idx + 1]
|
161 |
+
return next_sample["id"]
|
162 |
+
return current_id
|
163 |
|
164 |
+
def prev_sample(current_id):
|
165 |
+
"""Load the previous sample."""
|
166 |
+
current_sample = next((s for s in samples if s["id"] == current_id), None)
|
167 |
+
if not current_sample:
|
168 |
return current_id
|
169 |
+
|
170 |
+
current_idx = samples.index(current_sample)
|
171 |
+
if current_idx > 0:
|
172 |
+
prev_sample = samples[current_idx - 1]
|
173 |
+
return prev_sample["id"]
|
174 |
+
return current_id
|
175 |
|
|
|
176 |
def save_results():
|
177 |
+
"""Save all collected results to a file."""
|
178 |
+
output_path = f"{task_data['task_name']}_human_results.json"
|
179 |
+
with open(output_path, "w") as f:
|
180 |
+
json.dump(results, f, indent=2)
|
181 |
+
return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
|
|
|
182 |
|
183 |
+
# Define a wrapper function that collects all the dropdown values into a list
|
184 |
+
def save_ranking_wrapper(*args):
|
185 |
+
# The last argument is the sample_id, all others are rankings
|
186 |
+
rankings = args[:-1]
|
187 |
+
sample_id = args[-1]
|
188 |
+
return save_ranking(rankings, sample_id)
|
189 |
|
190 |
+
# Connect events
|
191 |
submit_btn.click(
|
192 |
+
save_ranking_wrapper,
|
193 |
+
inputs=ranking_dropdowns + [current_sample_id],
|
194 |
outputs=[status_box, progress_text]
|
195 |
)
|
196 |
|
197 |
next_btn.click(
|
198 |
+
next_sample,
|
199 |
+
inputs=[current_sample_id],
|
200 |
outputs=[current_sample_id]
|
201 |
).then(
|
202 |
load_sample,
|
203 |
inputs=[current_sample_id],
|
204 |
+
outputs=[query_text] + doc_containers + ranking_dropdowns + [current_sample_id, progress_text, status_box]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
)
|
206 |
|
207 |
prev_btn.click(
|
208 |
+
prev_sample,
|
209 |
+
inputs=[current_sample_id],
|
210 |
outputs=[current_sample_id]
|
211 |
).then(
|
212 |
load_sample,
|
213 |
inputs=[current_sample_id],
|
214 |
+
outputs=[query_text] + doc_containers + ranking_dropdowns + [current_sample_id, progress_text, status_box]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
)
|
216 |
|
217 |
save_btn.click(save_results, outputs=[status_box])
|
218 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
return demo
|
220 |
|
221 |
# Main app with file upload capability
|
|
|
268 |
""")
|
269 |
|
270 |
with gr.Row():
|
271 |
+
with gr.Column(scale=1):
|
272 |
file_input = gr.File(label="Upload a task file (JSON)")
|
273 |
load_btn = gr.Button("Load Task")
|
274 |
message = gr.Textbox(label="Status", interactive=False)
|
|
|
314 |
download_results_btn = gr.Button("Download Results")
|
315 |
|
316 |
# Right side - will contain the actual interface
|
317 |
+
with gr.Column(scale=2):
|
318 |
task_container = gr.HTML()
|
319 |
|
320 |
# Handle file upload and storage
|
|
|
425 |
|
426 |
# Add download options
|
427 |
with gr.Row():
|
428 |
+
download_all_btn = gr.Button("Download All Results (ZIP)")
|
429 |
+
result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download")
|
430 |
+
download_selected_btn = gr.Button("Download Selected")
|
|
|
|
|
431 |
|
432 |
# Add results visualization placeholder
|
433 |
gr.Markdown("### Results Visualization")
|