AdnanElAssadi commited on
Commit
dbcd9e2
·
verified ·
1 Parent(s): 48294e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -379
app.py CHANGED
@@ -9,23 +9,55 @@ def create_reranking_interface(task_data):
9
  results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
10
  completed_samples = {s["id"]: False for s in samples}
11
 
12
- # Try to load existing results
13
- output_path = f"{task_data['task_name']}_human_results.json"
14
- if os.path.exists(output_path):
15
  try:
16
- with open(output_path, "r") as f:
17
- existing_results = json.load(f)
18
- results = existing_results
19
- # Update completed samples based on existing annotations
20
- for anno in results.get("annotations", []):
21
- if "sample_id" in anno:
22
- completed_samples[anno["sample_id"]] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  except Exception as e:
24
- print(f"Error loading existing results: {str(e)}")
 
25
 
26
- # Create the main interface
27
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
28
  gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
 
29
  with gr.Accordion("Instructions", open=True):
30
  gr.Markdown("""
31
  ## Task Instructions
@@ -34,413 +66,156 @@ def create_reranking_interface(task_data):
34
 
35
  ### How to use this interface:
36
  1. Read the query at the top
37
- 2. For each document, select its rank (1 = most relevant)
38
- 3. Make sure each document has a unique rank (1 to N)
39
- 4. Click "Submit Rankings" when you're done with the current query
40
- 5. Use "Previous" and "Next" to navigate between queries
41
- 6. Click "Save All Results" periodically to ensure your work is saved
 
42
  """.format(instructions=task_data["instructions"]))
43
 
44
- # State variables
45
  current_sample_id = gr.State(value=samples[0]["id"])
46
 
47
- # Progress tracking
48
  with gr.Row():
49
- progress_text = gr.Textbox(label="Progress", value=f"Progress: {sum(completed_samples.values())}/{len(samples)}", interactive=False)
50
  status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
51
 
52
- # Query display
53
  with gr.Group():
54
  gr.Markdown("## Query:")
55
- query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False, lines=3)
56
 
57
- # Document ranking section
58
  gr.Markdown("## Documents to Rank:")
59
 
60
- # Create simple data structure for documents
61
- doc_state = gr.State(value=samples[0]["candidates"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Create dynamic HTML for the ranking interface
64
- def generate_ranking_html(docs, existing_ranks=None):
65
- """Generate HTML for number-based ranking interface."""
66
- if not docs:
67
- return ""
68
-
69
- # Use existing ranks if available
70
- ranks = list(range(1, len(docs) + 1))
71
- if existing_ranks and len(existing_ranks) == len(docs):
72
- ranks = existing_ranks
73
-
74
- html = """
75
- <style>
76
- .doc-container {
77
- margin-bottom: 15px;
78
- border: 1px solid #ddd;
79
- border-radius: 8px;
80
- padding: 15px;
81
- background-color: #f9f9f9;
82
- }
83
- .doc-header {
84
- display: flex;
85
- align-items: center;
86
- margin-bottom: 10px;
87
- }
88
- .doc-rank {
89
- display: flex;
90
- align-items: center;
91
- margin-right: 15px;
92
- }
93
- .rank-label {
94
- font-weight: bold;
95
- margin-right: 8px;
96
- min-width: 80px;
97
- }
98
- .rank-input {
99
- width: 60px;
100
- padding: 5px;
101
- border: 2px solid #007bff;
102
- border-radius: 4px;
103
- text-align: center;
104
- font-size: 16px;
105
- }
106
- .doc-content {
107
- padding: 10px;
108
- background-color: white;
109
- border-radius: 4px;
110
- border-left: 4px solid #007bff;
111
- white-space: pre-wrap;
112
- font-family: sans-serif;
113
- line-height: 1.5;
114
- }
115
- </style>
116
-
117
- <div id="ranking-form">
118
- <input type="hidden" id="ranking-state" value="">
119
- """
120
-
121
- # Add each document with a number input
122
- for i, doc in enumerate(docs):
123
- import html as html_lib
124
- escaped_doc = html_lib.escape(doc)
125
- current_rank = ranks[i] if i < len(ranks) else i + 1
126
-
127
- html += f"""
128
- <div class="doc-container" id="doc-{i}">
129
- <div class="doc-header">
130
- <div class="doc-rank">
131
- <span class="rank-label">Document {i+1} Rank:</span>
132
- <input type="number" class="rank-input" id="rank-{i}" value="{current_rank}"
133
- min="1" max="{len(docs)}" data-doc-id="{i}"
134
- onchange="updateRankings()">
135
- </div>
136
- </div>
137
- <div class="doc-content">{escaped_doc}</div>
138
- </div>
139
- """
140
-
141
- # Add validation and state tracking JS
142
- html += """
143
- <script>
144
- function updateRankings() {
145
- // Collect all rank inputs
146
- const inputs = document.querySelectorAll('.rank-input');
147
- const rankings = [];
148
-
149
- // Get values and highlight duplicates
150
- const values = new Map();
151
- const duplicates = new Set();
152
-
153
- inputs.forEach(input => {
154
- const docId = parseInt(input.getAttribute('data-doc-id'));
155
- const rank = parseInt(input.value);
156
-
157
- // Store value
158
- rankings.push({
159
- docId: docId,
160
- rank: rank
161
- });
162
-
163
- // Check for duplicates
164
- if (values.has(rank)) {
165
- duplicates.add(rank);
166
- } else {
167
- values.set(rank, docId);
168
- }
169
-
170
- // Reset styling
171
- input.style.borderColor = '#007bff';
172
- });
173
-
174
- // Highlight duplicates
175
- inputs.forEach(input => {
176
- const rank = parseInt(input.value);
177
- if (duplicates.has(rank)) {
178
- input.style.borderColor = '#ff3860';
179
- }
180
- });
181
-
182
- // Store to hidden input
183
- const stateInput = document.getElementById('ranking-state');
184
- if (stateInput) {
185
- stateInput.value = JSON.stringify(rankings);
186
- }
187
-
188
- // Update gradio text area
189
- const textArea = document.querySelector('#rankings-state-input textarea');
190
- if (textArea) {
191
- textArea.value = JSON.stringify(rankings);
192
- const event = new Event('input', { bubbles: true });
193
- textArea.dispatchEvent(event);
194
- }
195
- }
196
-
197
- // Initialize on page load
198
- document.addEventListener('DOMContentLoaded', updateRankings);
199
- // Also use a delay as a backup
200
- setTimeout(updateRankings, 500);
201
- </script>
202
- </div>
203
- """
204
-
205
- return html
206
-
207
- # Initial ranking HTML
208
- ranking_html = gr.HTML(
209
- generate_ranking_html(samples[0]["candidates"]),
210
- elem_id="ranking-container"
211
- )
212
-
213
- # Hidden input for state
214
- rankings_state = gr.Textbox(
215
- value="[]",
216
- visible=False,
217
- elem_id="rankings-state-input"
218
- )
219
-
220
- # Validation message
221
- validation_msg = gr.Textbox(
222
- label="Validation",
223
- interactive=False
224
- )
225
-
226
- # Navigation and submission buttons
227
  with gr.Row():
228
  prev_btn = gr.Button("← Previous Query", size="sm")
229
- validate_btn = gr.Button("Validate Rankings", variant="secondary")
230
  submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
231
  next_btn = gr.Button("Next Query →", size="sm")
 
232
  save_btn = gr.Button("💾 Save All Results", variant="secondary")
233
 
234
- # Function to load a sample
235
  def load_sample(sample_id):
236
- try:
237
- sample = next((s for s in samples if s["id"] == sample_id), None)
238
- if not sample:
239
- return sample_id, gr.update(), gr.update(), gr.update(), gr.update(), "[]", gr.update()
240
-
241
- # Get existing ranking if available
242
- existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
243
-
244
- # Generate HTML with existing rankings if available
245
- html = generate_ranking_html(sample["candidates"], existing_ranking)
246
-
247
- # Update status
248
- status = "Already ranked" if completed_samples.get(sample_id, False) else "Ready to rank"
249
- progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
250
-
251
- return sample_id, sample["query"], html, progress, status, "[]", ""
252
- except Exception as e:
253
- import traceback
254
- print(traceback.format_exc())
255
- return sample_id, gr.update(), gr.update(), gr.update(), f"Error: {str(e)}", "[]", ""
256
-
257
- # Function to validate rankings from JSON state
258
- def validate_ranking_state(state_json):
259
- try:
260
- if not state_json or state_json == "[]":
261
- return "Please rank all documents before submitting."
262
-
263
- # Parse the state
264
- state = json.loads(state_json)
265
- if not state:
266
- return "No ranking data found."
267
-
268
- # Extract ranks
269
- ranks = [item.get("rank") for item in state if "rank" in item]
270
- if not ranks:
271
- return "No valid ranks found."
272
-
273
- # Check for duplicates
274
- if len(set(ranks)) != len(ranks):
275
- # Find duplicates
276
- dupes = {}
277
- for r in ranks:
278
- dupes[r] = dupes.get(r, 0) + 1
279
- duplicates = [r for r, count in dupes.items() if count > 1]
280
- return f"⚠️ Duplicate ranks found: {', '.join(map(str, sorted(duplicates)))}. Each document must have a unique rank."
281
-
282
- # Check for complete sequence
283
- max_rank = max(ranks)
284
- expected = set(range(1, max_rank + 1))
285
- if set(ranks) != expected:
286
- missing = sorted(expected - set(ranks))
287
- return f"⚠️ Missing ranks: {', '.join(map(str, missing))}. Ranks must be consecutive from 1 to {max_rank}."
288
-
289
- return "✅ Rankings are valid. Ready to submit."
290
-
291
- except json.JSONDecodeError:
292
- return "Error parsing ranking data."
293
- except Exception as e:
294
- return f"Error validating rankings: {str(e)}"
295
-
296
- # Function to save rankings from JSON state
297
- def save_ranking_state(sample_id, state_json):
298
- try:
299
- if not state_json or state_json == "[]":
300
- return "Please rank all documents before submitting.", progress_text.value
301
-
302
- # Get the sample
303
- sample = next((s for s in samples if s["id"] == sample_id), None)
304
- if not sample:
305
- return "⚠️ Sample not found", progress_text.value
306
-
307
- # Parse the state
308
- state = json.loads(state_json)
309
-
310
- # Create a rankings array in the correct order
311
- num_candidates = len(sample["candidates"])
312
- rankings = [0] * num_candidates
313
-
314
- # Fill in rankings from state
315
- for item in state:
316
- doc_id = item.get("docId")
317
- rank = item.get("rank")
318
- if doc_id is not None and doc_id < num_candidates and rank is not None:
319
- rankings[doc_id] = rank
320
-
321
- # Validate rankings
322
- if any(r == 0 for r in rankings):
323
- return "⚠️ Not all documents have rankings", progress_text.value
324
-
325
- if sorted(rankings) != list(range(1, num_candidates + 1)):
326
- return f"⚠️ Invalid ranking sequence. Please use each number from 1 to {num_candidates} exactly once.", progress_text.value
327
-
328
- # Create annotation
329
- annotation = {"sample_id": sample_id, "rankings": rankings}
330
-
331
- # Update or add to results
332
- existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
333
- if existing_idx is not None:
334
- results["annotations"][existing_idx] = annotation
335
- else:
336
- results["annotations"].append(annotation)
337
-
338
- # Mark as completed
339
- completed_samples[sample_id] = True
340
-
341
- # Save to file
342
- with open(output_path, "w") as f:
343
- json.dump(results, f, indent=2)
344
-
345
- # Update progress
346
- progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
347
-
348
- return f"✅ Rankings saved successfully! ({sum(completed_samples.values())}/{len(samples)} completed)", progress
349
- except Exception as e:
350
- import traceback
351
- print(traceback.format_exc())
352
- return f"⚠️ Error saving rankings: {str(e)}", progress_text.value
353
 
354
- # Function to navigate to next sample
355
- def next_sample_id(current_id):
356
- current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
357
- if current_idx == -1:
358
  return current_id
359
- next_idx = min(current_idx + 1, len(samples) - 1)
360
- return samples[next_idx]["id"]
 
 
 
 
361
 
362
- # Function to navigate to previous sample
363
- def prev_sample_id(current_id):
364
- current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
365
- if current_idx == -1:
366
  return current_id
367
- prev_idx = max(current_idx - 1, 0)
368
- return samples[prev_idx]["id"]
 
 
 
 
369
 
370
- # Function to save all results
371
  def save_results():
372
- try:
373
- with open(output_path, "w") as f:
374
- json.dump(results, f, indent=2)
375
- return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
376
- except Exception as e:
377
- return f"⚠️ Error saving results file: {str(e)}"
378
 
379
- # Connect buttons
380
- validate_btn.click(
381
- validate_ranking_state,
382
- inputs=[rankings_state],
383
- outputs=[validation_msg]
384
- )
385
 
 
386
  submit_btn.click(
387
- save_ranking_state,
388
- inputs=[current_sample_id, rankings_state],
389
  outputs=[status_box, progress_text]
390
  )
391
 
392
  next_btn.click(
393
- next_sample_id,
394
- inputs=[current_sample_id],
395
  outputs=[current_sample_id]
396
  ).then(
397
  load_sample,
398
  inputs=[current_sample_id],
399
- outputs=[
400
- current_sample_id,
401
- query_text,
402
- ranking_html,
403
- progress_text,
404
- status_box,
405
- rankings_state,
406
- validation_msg
407
- ]
408
  )
409
 
410
  prev_btn.click(
411
- prev_sample_id,
412
- inputs=[current_sample_id],
413
  outputs=[current_sample_id]
414
  ).then(
415
  load_sample,
416
  inputs=[current_sample_id],
417
- outputs=[
418
- current_sample_id,
419
- query_text,
420
- ranking_html,
421
- progress_text,
422
- status_box,
423
- rankings_state,
424
- validation_msg
425
- ]
426
  )
427
 
428
  save_btn.click(save_results, outputs=[status_box])
429
-
430
- # Initialize with first sample
431
- demo.load(
432
- lambda: load_sample(samples[0]['id']),
433
- outputs=[
434
- current_sample_id,
435
- query_text,
436
- ranking_html,
437
- progress_text,
438
- status_box,
439
- rankings_state,
440
- validation_msg
441
- ]
442
- )
443
-
444
  return demo
445
 
446
  # Main app with file upload capability
@@ -493,7 +268,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
493
  """)
494
 
495
  with gr.Row():
496
- with gr.Column():
497
  file_input = gr.File(label="Upload a task file (JSON)")
498
  load_btn = gr.Button("Load Task")
499
  message = gr.Textbox(label="Status", interactive=False)
@@ -539,7 +314,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
539
  download_results_btn = gr.Button("Download Results")
540
 
541
  # Right side - will contain the actual interface
542
- with gr.Column():
543
  task_container = gr.HTML()
544
 
545
  # Handle file upload and storage
@@ -650,11 +425,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
650
 
651
  # Add download options
652
  with gr.Row():
653
- with gr.Column():
654
- download_all_btn = gr.Button("Download All Results (ZIP)")
655
- with gr.Column():
656
- result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download", value=None)
657
- download_selected_btn = gr.Button("Download Selected")
658
 
659
  # Add results visualization placeholder
660
  gr.Markdown("### Results Visualization")
 
9
  results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
10
  completed_samples = {s["id"]: False for s in samples}
11
 
12
+ def save_ranking(rankings, sample_id):
13
+ """Save the current set of rankings."""
 
14
  try:
15
+ # Check if all documents have rankings
16
+ all_ranked = all(r is not None and r != "" for r in rankings)
17
+ if not all_ranked:
18
+ return "⚠️ Please assign a rank to all documents before submitting", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
19
+
20
+ # Convert rankings to integers with better error handling
21
+ try:
22
+ processed_rankings = [int(r) for r in rankings]
23
+ except ValueError:
24
+ return "⚠️ Invalid ranking value. Please use only numbers.", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
25
+
26
+ # Check for duplicate rankings
27
+ if len(set(processed_rankings)) != len(processed_rankings):
28
+ return "⚠️ Each document must have a unique rank. Please review your rankings.", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
29
+
30
+ # Store this annotation in memory
31
+ existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
32
+ if existing_idx is not None:
33
+ results["annotations"][existing_idx] = {
34
+ "sample_id": sample_id,
35
+ "rankings": processed_rankings
36
+ }
37
+ else:
38
+ results["annotations"].append({
39
+ "sample_id": sample_id,
40
+ "rankings": processed_rankings
41
+ })
42
+
43
+ completed_samples[sample_id] = True
44
+
45
+ # Try to save to file, but continue even if it fails
46
+ try:
47
+ output_path = f"{task_data['task_name']}_human_results.json"
48
+ with open(output_path, "w") as f:
49
+ json.dump(results, f, indent=2)
50
+ return f"✅ Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
51
+ except:
52
+ # If file saving fails, still mark as success since we saved in memory
53
+ return f"✅ Rankings saved in memory (file save failed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
54
  except Exception as e:
55
+ # Return specific error message
56
+ return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
57
 
 
58
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
59
  gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
60
+
61
  with gr.Accordion("Instructions", open=True):
62
  gr.Markdown("""
63
  ## Task Instructions
 
66
 
67
  ### How to use this interface:
68
  1. Read the query at the top
69
+ 2. Review each document carefully
70
+ 3. Assign a rank to each document (1 = most relevant, higher numbers = less relevant)
71
+ 4. Each document must have a unique rank
72
+ 5. Click "Submit Rankings" when you're done with the current query
73
+ 6. Use "Previous" and "Next" to navigate between queries
74
+ 7. Click "Save All Results" periodically to ensure your work is saved
75
  """.format(instructions=task_data["instructions"]))
76
 
 
77
  current_sample_id = gr.State(value=samples[0]["id"])
78
 
 
79
  with gr.Row():
80
+ progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
81
  status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
82
 
 
83
  with gr.Group():
84
  gr.Markdown("## Query:")
85
+ query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
86
 
 
87
  gr.Markdown("## Documents to Rank:")
88
 
89
+ # Create document displays and ranking dropdowns in synchronized pairs
90
+ doc_containers = []
91
+ ranking_dropdowns = []
92
+
93
+ with gr.Column():
94
+ for i, doc in enumerate(samples[0]["candidates"]):
95
+ with gr.Row():
96
+ doc_box = gr.Textbox(
97
+ value=doc,
98
+ label=f"Document {i+1}",
99
+ interactive=False
100
+ )
101
+ dropdown = gr.Dropdown(
102
+ choices=[str(j) for j in range(1, len(samples[0]["candidates"])+1)],
103
+ label=f"Rank",
104
+ value=""
105
+ )
106
+ doc_containers.append(doc_box)
107
+ ranking_dropdowns.append(dropdown)
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  with gr.Row():
110
  prev_btn = gr.Button("← Previous Query", size="sm")
 
111
  submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
112
  next_btn = gr.Button("Next Query →", size="sm")
113
+
114
  save_btn = gr.Button("💾 Save All Results", variant="secondary")
115
 
 
116
  def load_sample(sample_id):
117
+ """Load a specific sample into the interface."""
118
+ sample = next((s for s in samples if s["id"] == sample_id), None)
119
+ if not sample:
120
+ return [query_text.value] + [d.value for d in doc_containers] + [""] * len(ranking_dropdowns) + [current_sample_id.value, progress_text.value, status_box.value]
121
+
122
+ # Update query
123
+ new_query = sample["query"]
124
+
125
+ # Update documents
126
+ new_docs = []
127
+ for i, doc in enumerate(sample["candidates"]):
128
+ if i < len(doc_containers):
129
+ new_docs.append(doc)
130
+
131
+ # Initialize rankings
132
+ new_rankings = [""] * len(ranking_dropdowns)
133
+
134
+ # Check if this sample has already been annotated
135
+ existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
136
+ if existing_annotation:
137
+ # Restore previous rankings
138
+ for i, rank in enumerate(existing_annotation["rankings"]):
139
+ if i < len(new_rankings) and rank is not None:
140
+ new_rankings[i] = str(rank)
141
+
142
+ # Update progress
143
+ current_idx = samples.index(sample)
144
+ new_progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
145
+
146
+ new_status = f"Viewing query {current_idx + 1} of {len(samples)}"
147
+ if completed_samples[sample_id]:
148
+ new_status += " (already completed)"
149
+
150
+ return [new_query] + new_docs + new_rankings + [sample["id"], new_progress, new_status]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ def next_sample(current_id):
153
+ """Load the next sample."""
154
+ current_sample = next((s for s in samples if s["id"] == current_id), None)
155
+ if not current_sample:
156
  return current_id
157
+
158
+ current_idx = samples.index(current_sample)
159
+ if current_idx < len(samples) - 1:
160
+ next_sample = samples[current_idx + 1]
161
+ return next_sample["id"]
162
+ return current_id
163
 
164
+ def prev_sample(current_id):
165
+ """Load the previous sample."""
166
+ current_sample = next((s for s in samples if s["id"] == current_id), None)
167
+ if not current_sample:
168
  return current_id
169
+
170
+ current_idx = samples.index(current_sample)
171
+ if current_idx > 0:
172
+ prev_sample = samples[current_idx - 1]
173
+ return prev_sample["id"]
174
+ return current_id
175
 
 
176
  def save_results():
177
+ """Save all collected results to a file."""
178
+ output_path = f"{task_data['task_name']}_human_results.json"
179
+ with open(output_path, "w") as f:
180
+ json.dump(results, f, indent=2)
181
+ return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
 
182
 
183
+ # Define a wrapper function that collects all the dropdown values into a list
184
+ def save_ranking_wrapper(*args):
185
+ # The last argument is the sample_id, all others are rankings
186
+ rankings = args[:-1]
187
+ sample_id = args[-1]
188
+ return save_ranking(rankings, sample_id)
189
 
190
+ # Connect events
191
  submit_btn.click(
192
+ save_ranking_wrapper,
193
+ inputs=ranking_dropdowns + [current_sample_id],
194
  outputs=[status_box, progress_text]
195
  )
196
 
197
  next_btn.click(
198
+ next_sample,
199
+ inputs=[current_sample_id],
200
  outputs=[current_sample_id]
201
  ).then(
202
  load_sample,
203
  inputs=[current_sample_id],
204
+ outputs=[query_text] + doc_containers + ranking_dropdowns + [current_sample_id, progress_text, status_box]
 
 
 
 
 
 
 
 
205
  )
206
 
207
  prev_btn.click(
208
+ prev_sample,
209
+ inputs=[current_sample_id],
210
  outputs=[current_sample_id]
211
  ).then(
212
  load_sample,
213
  inputs=[current_sample_id],
214
+ outputs=[query_text] + doc_containers + ranking_dropdowns + [current_sample_id, progress_text, status_box]
 
 
 
 
 
 
 
 
215
  )
216
 
217
  save_btn.click(save_results, outputs=[status_box])
218
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  return demo
220
 
221
  # Main app with file upload capability
 
268
  """)
269
 
270
  with gr.Row():
271
+ with gr.Column(scale=1):
272
  file_input = gr.File(label="Upload a task file (JSON)")
273
  load_btn = gr.Button("Load Task")
274
  message = gr.Textbox(label="Status", interactive=False)
 
314
  download_results_btn = gr.Button("Download Results")
315
 
316
  # Right side - will contain the actual interface
317
+ with gr.Column(scale=2):
318
  task_container = gr.HTML()
319
 
320
  # Handle file upload and storage
 
425
 
426
  # Add download options
427
  with gr.Row():
428
+ download_all_btn = gr.Button("Download All Results (ZIP)")
429
+ result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download")
430
+ download_selected_btn = gr.Button("Download Selected")
 
 
431
 
432
  # Add results visualization placeholder
433
  gr.Markdown("### Results Visualization")