AdnanElAssadi commited on
Commit
43b6806
·
verified ·
1 Parent(s): 87ef596

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +598 -511
app.py CHANGED
@@ -5,537 +5,585 @@ from pathlib import Path
5
 
6
  def create_reranking_interface(task_data):
7
  """Create a Gradio interface for reranking evaluation using drag and drop."""
8
- samples = task_data["samples"]
9
- results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
10
- completed_samples = {s["id"]: False for s in samples}
11
-
12
- # Define helper functions before UI elements are created
13
- def generate_sortable_html(candidates, existing_ranks=None):
14
- """Generate the HTML for the sortable list with up/down buttons."""
15
- if existing_ranks and len(existing_ranks) == len(candidates):
16
- order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
17
- else:
18
- order = list(range(len(candidates)))
19
-
20
- html = '<div id="sortable-container" class="sortable-container">'
21
- for rank_minus_1, idx in enumerate(order):
22
- if idx < len(candidates):
23
- doc = candidates[idx]
24
- rank = rank_minus_1 + 1
25
- import html as html_escaper
26
- escaped_doc = html_escaper.escape(doc)
27
-
28
- # Add navigation buttons (up/down arrows)
29
- up_disabled = "disabled" if rank == 1 else ""
30
- down_disabled = "disabled" if rank == len(candidates) else ""
31
-
32
- html += f'''\
33
- <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
34
- <div class="rank-controls">
35
- <button type="button" class="rank-btn up-btn" {up_disabled} onclick="window.moveItemUp({rank})">▲</button>
36
- <div class="rank-badge">{rank}</div>
37
- <button type="button" class="rank-btn down-btn" {down_disabled} onclick="window.moveItemDown({rank})">▼</button>
38
- </div>
39
- <div class="doc-content">{escaped_doc}</div>
40
- </div>
41
- '''
42
- html += '</div>'
43
 
44
- # Also return the computed order for proper initialization
45
- return html, order
46
-
47
- def save_ranking(order_json, sample_id):
48
- """Save the current ranking to results."""
49
- try:
50
- if not order_json or order_json == "[]":
51
- return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
52
-
53
- order = json.loads(order_json)
54
- sample = next((s for s in samples if s["id"] == sample_id), None)
55
-
56
- if not sample:
57
- return "⚠️ Sample not found.", progress_text.value
58
-
59
- num_candidates = len(sample["candidates"])
60
-
61
- if len(order) != num_candidates:
62
- return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
63
-
64
- rankings = [0] * num_candidates
65
  try:
66
- for rank_minus_1, doc_idx in enumerate(order):
67
- if doc_idx < num_candidates:
68
- rankings[doc_idx] = rank_minus_1 + 1
69
- else:
70
- raise ValueError(f"Invalid document index {doc_idx} found in order.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  except Exception as e:
72
- return f"⚠️ Error processing ranking order: {str(e)}", progress_text.value
 
 
 
 
 
 
 
73
 
74
- if sorted(rankings) != list(range(1, num_candidates + 1)):
75
- return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
76
 
77
- annotation = {"sample_id": sample_id, "rankings": rankings}
78
-
79
- # Check if this sample was already annotated
80
- existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
81
- if existing_idx is not None:
82
- results["annotations"][existing_idx] = annotation
83
- else:
84
- results["annotations"].append(annotation)
85
 
86
- completed_samples[sample_id] = True
87
-
88
- # Save results with timestamp and better error handling
89
- try:
90
- output_path = f"{task_data['task_name']}_human_results.json"
91
- with open(output_path, "w") as f:
92
- json.dump(results, f, indent=2)
 
 
 
 
 
93
 
94
- # Check if all samples are complete
95
- all_completed = sum(completed_samples.values()) == len(samples)
96
- completion_message = "🎉 All samples completed! You can save and submit your results." if all_completed else ""
97
-
98
- return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed) {completion_message}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
99
- except Exception as file_error:
100
- print(f"Error saving file: {str(file_error)}")
101
- # Still mark as completed in memory even if file save fails
102
- return f"⚠️ Rankings recorded but file save failed: {str(file_error)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
103
-
104
- except json.JSONDecodeError:
105
- return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
106
- except Exception as e:
107
- import traceback
108
- print(traceback.format_exc())
109
- return f"Error saving ranking: {str(e)}", progress_text.value
110
-
111
- def load_sample(sample_id):
112
- """Load a sample into the interface."""
113
- try:
114
- sample = next((s for s in samples if s["id"] == sample_id), None)
115
- if not sample:
116
- return gr.update(), gr.update(value="[]"), gr.update(), gr.update()
117
-
118
- existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
119
-
120
- # Get both the HTML and the initial order
121
- new_html, initial_order = generate_sortable_html(sample["candidates"], existing_ranking)
122
-
123
- # Convert initial order to JSON string for state
124
- initial_order_json = json.dumps(initial_order)
125
-
126
- status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
127
- progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
128
-
129
- return sample["query"], new_html, initial_order_json, progress, status
130
- except Exception as e:
131
- import traceback
132
- print(traceback.format_exc())
133
- return gr.update(), gr.update(), "[]", gr.update(), f"Error loading sample: {str(e)}"
134
-
135
- def next_sample_id(current_id):
136
- current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
137
- if current_idx == -1:
138
- return current_id
139
- next_idx = min(current_idx + 1, len(samples) - 1)
140
- return samples[next_idx]["id"]
141
-
142
- def prev_sample_id(current_id):
143
- current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
144
- if current_idx == -1:
145
- return current_id
146
- prev_idx = max(current_idx - 1, 0)
147
- return samples[prev_idx]["id"]
148
-
149
- def save_results():
150
- output_path = f"{task_data['task_name']}_human_results.json"
151
- try:
152
- # Create backup with timestamp
153
- from datetime import datetime
154
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
155
- backup_path = f"{task_data['task_name']}_results_{timestamp}.json"
156
-
157
- # First create a backup
158
- with open(backup_path, "w") as f:
159
- json.dump(results, f, indent=2)
160
-
161
- # Then save to the main file
162
- with open(output_path, "w") as f:
163
- json.dump(results, f, indent=2)
164
-
165
- return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)\nBackup created at {backup_path}"
166
- except Exception as e:
167
- return f"⚠️ Error saving results file: {str(e)}"
168
-
169
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
170
- gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
171
- with gr.Accordion("Instructions", open=True):
172
- gr.Markdown("""
173
- ## Task Instructions
174
-
175
- {instructions}
176
-
177
- ### How to use this interface:
178
- 1. Read the query at the top
179
- 2. Drag and drop documents to reorder them based on relevance
180
- 3. Top document = Rank 1, Second = Rank 2, etc.
181
- 4. Click "Submit Rankings" when you're done with the current query
182
- 5. Use "Previous" and "Next" to navigate between queries
183
- 6. Click "Save All Results" periodically to ensure your work is saved
184
- """.format(instructions=task_data["instructions"]))
185
 
186
- current_sample_id = gr.State(value=samples[0]["id"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- with gr.Row():
189
- progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
190
- status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
 
 
 
 
 
 
 
191
 
192
- with gr.Group():
193
- gr.Markdown("## Query:")
194
- query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
195
- gr.Markdown("## Documents to Rank (Drag to Reorder):")
196
- sortable_list = gr.HTML(generate_sortable_html(samples[0]["candidates"], []), elem_id="sortable-list-container")
197
- order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
198
- with gr.Row():
199
- prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
200
- submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
201
- next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
202
- save_btn = gr.Button("💾 Save All Results", variant="secondary")
203
 
204
- js_code = """
205
- <script src="https://cdn.jsdelivr.net/npm/[email protected]/Sortable.min.js"></script>
206
- <script>
207
- // Make the functions globally available
208
- window.moveItemUp = function(currentRank) {
209
- console.log('Moving item up:', currentRank);
210
- if (currentRank <= 1) return; // Already at the top
211
-
212
- const container = document.getElementById('sortable-container');
213
- if (!container) {
214
- console.error('Container not found');
215
- return;
216
- }
217
-
218
- const items = Array.from(container.querySelectorAll('.sortable-item'));
219
- console.log('Found items:', items.length);
220
-
221
- // Find the items to swap by their data-rank attribute
222
- const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
223
- const aboveItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank - 1);
224
-
225
- if (!currentItem || !aboveItem) {
226
- console.error('Items not found:', currentItem, aboveItem);
227
- return;
228
- }
229
-
230
- console.log('Swapping items:', currentItem, aboveItem);
231
-
232
- // Swap the items in the DOM
233
- if (aboveItem.previousElementSibling) {
234
- container.insertBefore(currentItem, aboveItem);
235
- } else {
236
- container.insertBefore(currentItem, container.firstChild);
237
- }
238
-
239
- // Update ranks
240
- window.updateRanksAfterMove();
241
- };
242
 
243
- window.moveItemDown = function(currentRank) {
244
- console.log('Moving item down:', currentRank);
245
-
246
- const container = document.getElementById('sortable-container');
247
- if (!container) {
248
- console.error('Container not found');
249
- return;
250
- }
251
-
252
- const items = Array.from(container.querySelectorAll('.sortable-item'));
253
- console.log('Found items:', items.length);
254
-
255
- if (currentRank >= items.length) return; // Already at the bottom
256
-
257
- // Find the items to swap by their data-rank attribute
258
- const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
259
- const belowItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank + 1);
260
-
261
- if (!currentItem || !belowItem) {
262
- console.error('Items not found for moving down');
263
- return;
264
- }
265
-
266
- console.log('Swapping items down:', currentItem, belowItem);
267
-
268
- // Swap the items in the DOM - insert the current item after the below item
269
- container.insertBefore(currentItem, belowItem.nextElementSibling);
270
-
271
- // Update ranks
272
- window.updateRanksAfterMove();
273
- };
274
 
275
- window.updateRanksAfterMove = function() {
276
- console.log('Updating ranks');
277
- const container = document.getElementById('sortable-container');
278
- if (!container) {
279
- console.error('Container not found for rank update');
280
- return;
281
- }
 
 
 
 
 
 
 
 
 
282
 
283
- const items = Array.from(container.querySelectorAll('.sortable-item'));
284
- const orderInput = document.querySelector('#current-order textarea');
285
- if (!orderInput) {
286
- console.error('Order input not found');
287
- return;
288
- }
289
 
290
- const order = [];
291
- items.forEach((item, index) => {
292
- const rank = index + 1;
293
- const docId = parseInt(item.getAttribute('data-doc-id'));
294
-
295
- // Update rank display
296
- const rankBadge = item.querySelector('.rank-badge');
297
- if (rankBadge) rankBadge.textContent = rank;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
- // Update item classes
300
- item.className = item.className.replace(/rank-bg-\\d+/g, '').trim();
301
- item.classList.add(`rank-bg-${rank}`);
 
 
302
 
303
- // Update data attribute
304
- item.setAttribute('data-rank', rank);
305
 
306
- // Update button states
307
- const upBtn = item.querySelector('.up-btn');
308
- const downBtn = item.querySelector('.down-btn');
309
 
310
- if (upBtn) {
311
- if (rank == 1) {
312
- upBtn.setAttribute('disabled', 'disabled');
313
- } else {
314
- upBtn.removeAttribute('disabled');
315
- }
316
  }
317
 
318
- if (downBtn) {
319
- if (rank == items.length) {
320
- downBtn.setAttribute('disabled', 'disabled');
321
- } else {
322
- downBtn.removeAttribute('disabled');
323
- }
 
324
  }
325
 
326
- order.push(docId);
327
- });
328
-
329
- // Update hidden input with JSON
330
- console.log('New order:', order);
331
- const newOrderValue = JSON.stringify(order);
332
- orderInput.value = newOrderValue;
333
-
334
- // Trigger input event
335
- const event = new Event('input', { bubbles: true });
336
- orderInput.dispatchEvent(event);
337
- };
338
-
339
- document.addEventListener('DOMContentLoaded', function() {
340
- console.log('DOM loaded, initializing ranking interface');
341
 
342
- // Function to initialize the interface
343
- function initializeRankingInterface() {
 
344
  const container = document.getElementById('sortable-container');
345
  if (!container) {
346
- console.log('Container not found, retrying in 200ms');
347
- setTimeout(initializeRankingInterface, 200);
348
  return;
349
  }
350
 
351
- console.log('Sortable container found, setting up');
 
352
 
353
- // Add click events directly to buttons as a backup
354
- const upButtons = container.querySelectorAll('.up-btn');
355
- const downButtons = container.querySelectorAll('.down-btn');
356
 
357
- upButtons.forEach(btn => {
358
- btn.addEventListener('click', function() {
359
- const item = this.closest('.sortable-item');
360
- const rank = parseInt(item.getAttribute('data-rank'));
361
- window.moveItemUp(rank);
362
- });
363
- });
364
 
365
- downButtons.forEach(btn => {
366
- btn.addEventListener('click', function() {
367
- const item = this.closest('.sortable-item');
368
- const rank = parseInt(item.getAttribute('data-rank'));
369
- window.moveItemDown(rank);
370
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  });
372
 
373
- // Initialize drag-and-drop as fallback
374
- if (typeof Sortable !== 'undefined') {
375
- if (!container.sortableInstance) {
376
- container.sortableInstance = new Sortable(container, {
377
- animation: 150,
378
- ghostClass: "sortable-ghost",
379
- onEnd: function() {
380
- window.updateRanksAfterMove();
381
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  }
384
- } else {
385
- console.log('Sortable library not available');
 
386
  }
387
 
388
- // Initialize the ranking
389
- window.updateRanksAfterMove();
390
- }
391
-
392
- // Initialize immediately
393
- initializeRankingInterface();
394
-
395
- // Also observe DOM changes to reinitialize when needed
396
- const targetNode = document.getElementById('sortable-list-container');
397
- if (targetNode) {
398
- const config = { childList: true, subtree: true };
399
- const observer = new MutationObserver(function(mutationsList) {
400
- for(const mutation of mutationsList) {
401
- if (mutation.type === 'childList') {
402
- if (document.getElementById('sortable-container')) {
403
- console.log('DOM changed, reinitializing');
404
- initializeRankingInterface();
405
  }
406
  }
407
- }
408
- });
409
- observer.observe(targetNode, config);
 
 
 
 
 
 
 
 
 
 
 
410
  }
411
- });
412
- </script>
413
- <style>
414
- .sortable-container {
415
- display: flex;
416
- flex-direction: column;
417
- gap: 12px;
418
- min-height: 200px;
419
- padding: 16px;
420
- background-color: #f8f9fa;
421
- border-radius: 8px;
422
- }
423
- .sortable-item {
424
- padding: 14px;
425
- background-color: #fff;
426
- border: 1px solid #e0e0e0;
427
- border-radius: 6px;
428
- display: flex;
429
- align-items: center;
430
- transition: all 0.2s ease;
431
- }
432
- .sortable-item:hover {
433
- background-color: #f8f9fa;
434
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
435
- }
436
- .rank-controls {
437
- display: flex;
438
- flex-direction: column;
439
- align-items: center;
440
- margin-right: 16px;
441
- }
442
- .rank-badge {
443
- display: flex;
444
- align-items: center;
445
- justify-content: center;
446
- width: 28px;
447
- height: 28px;
448
- border-radius: 50%;
449
- background-color: #6c757d;
450
- color: white;
451
- font-weight: bold;
452
- margin: 6px 0;
453
- flex-shrink: 0;
454
- }
455
- .rank-btn {
456
- width: 28px;
457
- height: 28px;
458
- border: none;
459
- background-color: #f0f0f0;
460
- border-radius: 4px;
461
- margin: 2px 0;
462
- cursor: pointer;
463
- display: flex;
464
- align-items: center;
465
- justify-content: center;
466
- font-size: 14px;
467
- }
468
- .rank-btn:hover:not([disabled]) {
469
- background-color: #e0e0e0;
470
- }
471
- .rank-btn:active:not([disabled]) {
472
- background-color: #d0d0d0;
473
- }
474
- .rank-btn:disabled {
475
- opacity: 0.5;
476
- cursor: not-allowed;
477
- }
478
- .doc-content {
479
- flex: 1;
480
- line-height: 1.5;
481
- word-break: break-word;
482
- }
483
- /* More professional color scheme for rank badges */
484
- .rank-bg-1 .rank-badge { background-color: #1e40af; } /* Deep blue for top rank */
485
- .rank-bg-2 .rank-badge { background-color: #3b82f6; } /* Medium blue */
486
- .rank-bg-3 .rank-badge { background-color: #60a5fa; } /* Light blue */
487
- .rank-bg-4 .rank-badge { background-color: #93c5fd; color: #1e3a8a; } /* Very light blue with dark text */
488
- .rank-bg-5 .rank-badge { background-color: #bfdbfe; color: #1e3a8a; } /* Lightest blue with dark text */
489
-
490
- /* Lower ranks get progressively more gray */
491
- .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge {
492
- background-color: #64748b;
493
- }
494
- .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge, .rank-bg-10 .rank-badge {
495
- background-color: #94a3b8;
496
- color: #0f172a;
497
- }
498
- .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge, .rank-bg-13 .rank-badge,
499
- .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge, .rank-bg-16 .rank-badge,
500
- .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge, .rank-bg-19 .rank-badge,
501
- .rank-bg-20 .rank-badge {
502
- background-color: #cbd5e1;
503
- color: #0f172a;
504
- }
505
- </style>
506
- """
507
- gr.HTML(js_code)
508
-
509
- submit_btn.click(
510
- save_ranking,
511
- inputs=[order_state, current_sample_id],
512
- outputs=[status_box, progress_text]
513
- )
514
-
515
- next_btn.click(
516
- next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
517
- ).then(
518
- load_sample,
519
- inputs=[current_sample_id],
520
- outputs=[query_text, sortable_list, order_state, progress_text, status_box]
521
- )
522
-
523
- prev_btn.click(
524
- prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
525
- ).then(
526
- load_sample,
527
- inputs=[current_sample_id],
528
- outputs=[query_text, sortable_list, order_state, progress_text, status_box]
529
- )
530
-
531
- save_btn.click(save_results, outputs=[status_box])
532
-
533
- demo.load(lambda: load_sample(samples[0]['id']),
534
- outputs=[query_text, sortable_list, order_state, progress_text, status_box])
535
-
536
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
 
538
- # Main app with file upload capability
539
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
540
  gr.Markdown("# MTEB Human Evaluation Demo")
541
 
@@ -547,35 +595,63 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
547
  This interface allows you to evaluate the relevance of documents for reranking tasks.
548
  """)
549
 
550
- # Function to get the most recent task file
551
  def get_latest_task_file():
552
- # Check first in uploaded_tasks directory
553
- os.makedirs("uploaded_tasks", exist_ok=True)
554
- uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
555
-
556
- if uploaded_tasks:
557
- # Sort by modification time, newest first
558
- uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
559
- return os.path.join("uploaded_tasks", uploaded_tasks[0])
560
-
561
- # Fall back to default example
562
- return "AskUbuntuDupQuestions_human_eval.json"
 
 
 
 
 
 
 
 
563
 
564
- # Load the task file
565
  task_file = get_latest_task_file()
566
 
 
567
  try:
568
- with open(task_file, "r") as f:
569
- task_data = json.load(f)
570
-
571
- # Show which task is currently loaded
572
- gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
573
-
574
- # Display the interface
575
- reranking_demo = create_reranking_interface(task_data)
 
 
 
 
 
 
 
 
 
 
576
  except Exception as e:
 
 
577
  gr.Markdown(f"**Error loading task: {str(e)}**")
578
  gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
 
 
 
 
 
 
 
579
 
580
  with gr.TabItem("Upload & Evaluate"):
581
  gr.Markdown("""
@@ -781,4 +857,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
781
  download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
782
 
783
  if __name__ == "__main__":
784
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def create_reranking_interface(task_data):
7
  """Create a Gradio interface for reranking evaluation using drag and drop."""
8
+ try:
9
+ samples = task_data["samples"]
10
+ results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
11
+ completed_samples = {s["id"]: False for s in samples}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Define helper functions before UI elements are created
14
+ def generate_sortable_html(candidates, existing_ranks=None):
15
+ """Generate the HTML for the sortable list with up/down buttons."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  try:
17
+ if existing_ranks and len(existing_ranks) == len(candidates):
18
+ order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
19
+ else:
20
+ order = list(range(len(candidates)))
21
+
22
+ html = '<div id="sortable-container" class="sortable-container">'
23
+ for rank_minus_1, idx in enumerate(order):
24
+ if idx < len(candidates):
25
+ doc = candidates[idx]
26
+ rank = rank_minus_1 + 1
27
+ import html as html_escaper
28
+ escaped_doc = html_escaper.escape(doc)
29
+
30
+ # Add navigation buttons (up/down arrows)
31
+ up_disabled = "disabled" if rank == 1 else ""
32
+ down_disabled = "disabled" if rank == len(candidates) else ""
33
+
34
+ html += f'''\
35
+ <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
36
+ <div class="rank-controls">
37
+ <button type="button" class="rank-btn up-btn" {up_disabled} onclick="window.moveItemUp({rank})">▲</button>
38
+ <div class="rank-badge">{rank}</div>
39
+ <button type="button" class="rank-btn down-btn" {down_disabled} onclick="window.moveItemDown({rank})">▼</button>
40
+ </div>
41
+ <div class="doc-content">{escaped_doc}</div>
42
+ </div>
43
+ '''
44
+ html += '</div>'
45
+
46
+ # Also return the computed order for proper initialization
47
+ return html, order
48
  except Exception as e:
49
+ print(f"Error in generate_sortable_html: {str(e)}")
50
+ return f'<div class="error">Error generating ranking interface: {str(e)}</div>', []
51
+
52
+ def save_ranking(order_json, sample_id):
53
+ """Save the current ranking to results."""
54
+ try:
55
+ if not order_json or order_json == "[]":
56
+ return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
57
 
58
+ order = json.loads(order_json)
59
+ sample = next((s for s in samples if s["id"] == sample_id), None)
60
 
61
+ if not sample:
62
+ return "⚠️ Sample not found.", progress_text.value
63
+
64
+ num_candidates = len(sample["candidates"])
 
 
 
 
65
 
66
+ if len(order) != num_candidates:
67
+ return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
68
+
69
+ rankings = [0] * num_candidates
70
+ try:
71
+ for rank_minus_1, doc_idx in enumerate(order):
72
+ if doc_idx < num_candidates:
73
+ rankings[doc_idx] = rank_minus_1 + 1
74
+ else:
75
+ raise ValueError(f"Invalid document index {doc_idx} found in order.")
76
+ except Exception as e:
77
+ return f"⚠️ Error processing ranking order: {str(e)}", progress_text.value
78
 
79
+ if sorted(rankings) != list(range(1, num_candidates + 1)):
80
+ return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
81
+
82
+ annotation = {"sample_id": sample_id, "rankings": rankings}
83
+
84
+ # Check if this sample was already annotated
85
+ existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
86
+ if existing_idx is not None:
87
+ results["annotations"][existing_idx] = annotation
88
+ else:
89
+ results["annotations"].append(annotation)
90
+
91
+ completed_samples[sample_id] = True
92
+
93
+ # Save results with timestamp and better error handling
94
+ try:
95
+ output_path = f"{task_data['task_name']}_human_results.json"
96
+ with open(output_path, "w") as f:
97
+ json.dump(results, f, indent=2)
98
+
99
+ # Check if all samples are complete
100
+ all_completed = sum(completed_samples.values()) == len(samples)
101
+ completion_message = "🎉 All samples completed! You can save and submit your results." if all_completed else ""
102
+
103
+ return f" Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed) {completion_message}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
104
+ except Exception as file_error:
105
+ print(f"Error saving file: {str(file_error)}")
106
+ # Still mark as completed in memory even if file save fails
107
+ return f"⚠️ Rankings recorded but file save failed: {str(file_error)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
108
+
109
+ except json.JSONDecodeError:
110
+ return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
111
+ except Exception as e:
112
+ import traceback
113
+ print(traceback.format_exc())
114
+ return f"Error saving ranking: {str(e)}", progress_text.value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ def load_sample(sample_id):
117
+ """Load a sample into the interface."""
118
+ try:
119
+ sample = next((s for s in samples if s["id"] == sample_id), None)
120
+ if not sample:
121
+ return gr.update(), gr.update(), "[]", gr.update(), "Sample not found"
122
+
123
+ existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
124
+
125
+ # Get both the HTML and the initial order
126
+ new_html, initial_order = generate_sortable_html(sample["candidates"], existing_ranking)
127
+
128
+ # Convert initial order to JSON string for state
129
+ initial_order_json = json.dumps(initial_order)
130
+
131
+ status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
132
+ progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
133
+
134
+ return sample["query"], new_html, initial_order_json, progress, status
135
+ except Exception as e:
136
+ import traceback
137
+ print(traceback.format_exc())
138
+ return "Error loading sample", "<div>Error loading sample content</div>", "[]", "Error", f"Error: {str(e)}"
139
 
140
+ def next_sample_id(current_id):
141
+ try:
142
+ current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
143
+ if current_idx == -1:
144
+ return samples[0]["id"] if samples else current_id
145
+ next_idx = min(current_idx + 1, len(samples) - 1)
146
+ return samples[next_idx]["id"]
147
+ except Exception as e:
148
+ print(f"Error in next_sample_id: {str(e)}")
149
+ return current_id
150
 
151
+ def prev_sample_id(current_id):
152
+ try:
153
+ current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
154
+ if current_idx == -1:
155
+ return samples[0]["id"] if samples else current_id
156
+ prev_idx = max(current_idx - 1, 0)
157
+ return samples[prev_idx]["id"]
158
+ except Exception as e:
159
+ print(f"Error in prev_sample_id: {str(e)}")
160
+ return current_id
 
161
 
162
+ def save_results():
163
+ output_path = f"{task_data['task_name']}_human_results.json"
164
+ try:
165
+ # Create backup with timestamp
166
+ from datetime import datetime
167
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
168
+ backup_path = f"{task_data['task_name']}_results_{timestamp}.json"
169
+
170
+ # First create a backup
171
+ with open(backup_path, "w") as f:
172
+ json.dump(results, f, indent=2)
173
+
174
+ # Then save to the main file
175
+ with open(output_path, "w") as f:
176
+ json.dump(results, f, indent=2)
177
+
178
+ return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)\nBackup created at {backup_path}"
179
+ except Exception as e:
180
+ return f"⚠️ Error saving results file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ # Create an empty initial sample ID with proper error handling
183
+ initial_sample_id = samples[0]["id"] if samples else None
184
+ if not initial_sample_id:
185
+ print("WARNING: No samples found in task data")
186
+ return gr.HTML("No samples found in the task data. Please check your task file and try again.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
189
+ gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
190
+ with gr.Accordion("Instructions", open=True):
191
+ gr.Markdown("""
192
+ ## Task Instructions
193
+
194
+ {instructions}
195
+
196
+ ### How to use this interface:
197
+ 1. Read the query at the top
198
+ 2. Drag and drop documents to reorder them based on relevance
199
+ 3. Top document = Rank 1, Second = Rank 2, etc.
200
+ 4. Click "Submit Rankings" when you're done with the current query
201
+ 5. Use "Previous" and "Next" to navigate between queries
202
+ 6. Click "Save All Results" periodically to ensure your work is saved
203
+ """.format(instructions=task_data.get("instructions", "Rank the documents based on their relevance to the query.")))
204
 
205
+ current_sample_id = gr.State(value=initial_sample_id)
 
 
 
 
 
206
 
207
+ with gr.Row():
208
+ progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
209
+ status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
210
+
211
+ with gr.Group():
212
+ gr.Markdown("## Query:")
213
+ query_text = gr.Textbox(value="Loading query...", label="", interactive=False)
214
+ gr.Markdown("## Documents to Rank (Drag to Reorder):")
215
+ sortable_list = gr.HTML("Loading documents...", elem_id="sortable-list-container")
216
+ order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
217
+ with gr.Row():
218
+ prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
219
+ submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
220
+ next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
221
+ save_btn = gr.Button("💾 Save All Results", variant="secondary")
222
+
223
+ js_code = """
224
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/Sortable.min.js"></script>
225
+ <script>
226
+ // Make the functions globally available
227
+ window.moveItemUp = function(currentRank) {
228
+ console.log('Moving item up:', currentRank);
229
+ if (currentRank <= 1) return; // Already at the top
230
 
231
+ const container = document.getElementById('sortable-container');
232
+ if (!container) {
233
+ console.error('Container not found');
234
+ return;
235
+ }
236
 
237
+ const items = Array.from(container.querySelectorAll('.sortable-item'));
238
+ console.log('Found items:', items.length);
239
 
240
+ // Find the items to swap by their data-rank attribute
241
+ const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
242
+ const aboveItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank - 1);
243
 
244
+ if (!currentItem || !aboveItem) {
245
+ console.error('Items not found:', currentItem, aboveItem);
246
+ return;
 
 
 
247
  }
248
 
249
+ console.log('Swapping items:', currentItem, aboveItem);
250
+
251
+ // Swap the items in the DOM
252
+ if (aboveItem.previousElementSibling) {
253
+ container.insertBefore(currentItem, aboveItem);
254
+ } else {
255
+ container.insertBefore(currentItem, container.firstChild);
256
  }
257
 
258
+ // Update ranks
259
+ window.updateRanksAfterMove();
260
+ };
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
+ window.moveItemDown = function(currentRank) {
263
+ console.log('Moving item down:', currentRank);
264
+
265
  const container = document.getElementById('sortable-container');
266
  if (!container) {
267
+ console.error('Container not found');
 
268
  return;
269
  }
270
 
271
+ const items = Array.from(container.querySelectorAll('.sortable-item'));
272
+ console.log('Found items:', items.length);
273
 
274
+ if (currentRank >= items.length) return; // Already at the bottom
 
 
275
 
276
+ // Find the items to swap by their data-rank attribute
277
+ const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
278
+ const belowItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank + 1);
 
 
 
 
279
 
280
+ if (!currentItem || !belowItem) {
281
+ console.error('Items not found for moving down');
282
+ return;
283
+ }
284
+
285
+ console.log('Swapping items down:', currentItem, belowItem);
286
+
287
+ // Swap the items in the DOM - insert the current item after the below item
288
+ container.insertBefore(currentItem, belowItem.nextElementSibling);
289
+
290
+ // Update ranks
291
+ window.updateRanksAfterMove();
292
+ };
293
+
294
+ window.updateRanksAfterMove = function() {
295
+ console.log('Updating ranks');
296
+ const container = document.getElementById('sortable-container');
297
+ if (!container) {
298
+ console.error('Container not found for rank update');
299
+ return;
300
+ }
301
+
302
+ const items = Array.from(container.querySelectorAll('.sortable-item'));
303
+ const orderInput = document.querySelector('#current-order textarea');
304
+ if (!orderInput) {
305
+ console.error('Order input not found');
306
+ return;
307
+ }
308
+
309
+ const order = [];
310
+ items.forEach((item, index) => {
311
+ const rank = index + 1;
312
+ const docId = parseInt(item.getAttribute('data-doc-id'));
313
+
314
+ // Update rank display
315
+ const rankBadge = item.querySelector('.rank-badge');
316
+ if (rankBadge) rankBadge.textContent = rank;
317
+
318
+ // Update item classes
319
+ item.className = item.className.replace(/rank-bg-\\d+/g, '').trim();
320
+ item.classList.add(`rank-bg-${rank}`);
321
+
322
+ // Update data attribute
323
+ item.setAttribute('data-rank', rank);
324
+
325
+ // Update button states
326
+ const upBtn = item.querySelector('.up-btn');
327
+ const downBtn = item.querySelector('.down-btn');
328
+
329
+ if (upBtn) {
330
+ if (rank == 1) {
331
+ upBtn.setAttribute('disabled', 'disabled');
332
+ } else {
333
+ upBtn.removeAttribute('disabled');
334
+ }
335
+ }
336
+
337
+ if (downBtn) {
338
+ if (rank == items.length) {
339
+ downBtn.setAttribute('disabled', 'disabled');
340
+ } else {
341
+ downBtn.removeAttribute('disabled');
342
+ }
343
+ }
344
+
345
+ order.push(docId);
346
  });
347
 
348
+ // Update hidden input with JSON
349
+ console.log('New order:', order);
350
+ const newOrderValue = JSON.stringify(order);
351
+ orderInput.value = newOrderValue;
352
+
353
+ // Trigger input event
354
+ const event = new Event('input', { bubbles: true });
355
+ orderInput.dispatchEvent(event);
356
+ };
357
+
358
+ document.addEventListener('DOMContentLoaded', function() {
359
+ console.log('DOM loaded, initializing ranking interface');
360
+
361
+ // Function to initialize the interface
362
+ function initializeRankingInterface() {
363
+ const container = document.getElementById('sortable-container');
364
+ if (!container) {
365
+ console.log('Container not found, retrying in 200ms');
366
+ setTimeout(initializeRankingInterface, 200);
367
+ return;
368
+ }
369
+
370
+ console.log('Sortable container found, setting up');
371
+
372
+ // Add click events directly to buttons as a backup
373
+ const upButtons = container.querySelectorAll('.up-btn');
374
+ const downButtons = container.querySelectorAll('.down-btn');
375
+
376
+ upButtons.forEach(btn => {
377
+ btn.addEventListener('click', function() {
378
+ const item = this.closest('.sortable-item');
379
+ const rank = parseInt(item.getAttribute('data-rank'));
380
+ window.moveItemUp(rank);
381
+ });
382
+ });
383
+
384
+ downButtons.forEach(btn => {
385
+ btn.addEventListener('click', function() {
386
+ const item = this.closest('.sortable-item');
387
+ const rank = parseInt(item.getAttribute('data-rank'));
388
+ window.moveItemDown(rank);
389
  });
390
+ });
391
+
392
+ // Initialize drag-and-drop as fallback
393
+ if (typeof Sortable !== 'undefined') {
394
+ if (!container.sortableInstance) {
395
+ container.sortableInstance = new Sortable(container, {
396
+ animation: 150,
397
+ ghostClass: "sortable-ghost",
398
+ onEnd: function() {
399
+ window.updateRanksAfterMove();
400
+ }
401
+ });
402
+ }
403
+ } else {
404
+ console.log('Sortable library not available');
405
  }
406
+
407
+ // Initialize the ranking
408
+ window.updateRanksAfterMove();
409
  }
410
 
411
+ // Initialize immediately
412
+ initializeRankingInterface();
413
+
414
+ // Also observe DOM changes to reinitialize when needed
415
+ const targetNode = document.getElementById('sortable-list-container');
416
+ if (targetNode) {
417
+ const config = { childList: true, subtree: true };
418
+ const observer = new MutationObserver(function(mutationsList) {
419
+ for(const mutation of mutationsList) {
420
+ if (mutation.type === 'childList') {
421
+ if (document.getElementById('sortable-container')) {
422
+ console.log('DOM changed, reinitializing');
423
+ initializeRankingInterface();
424
+ }
 
 
 
425
  }
426
  }
427
+ });
428
+ observer.observe(targetNode, config);
429
+ }
430
+ });
431
+ </script>
432
+ <style>
433
+ .sortable-container {
434
+ display: flex;
435
+ flex-direction: column;
436
+ gap: 12px;
437
+ min-height: 200px;
438
+ padding: 16px;
439
+ background-color: #f8f9fa;
440
+ border-radius: 8px;
441
  }
442
+ .sortable-item {
443
+ padding: 14px;
444
+ background-color: #fff;
445
+ border: 1px solid #e0e0e0;
446
+ border-radius: 6px;
447
+ display: flex;
448
+ align-items: center;
449
+ transition: all 0.2s ease;
450
+ }
451
+ .sortable-item:hover {
452
+ background-color: #f8f9fa;
453
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
454
+ }
455
+ .rank-controls {
456
+ display: flex;
457
+ flex-direction: column;
458
+ align-items: center;
459
+ margin-right: 16px;
460
+ }
461
+ .rank-badge {
462
+ display: flex;
463
+ align-items: center;
464
+ justify-content: center;
465
+ width: 28px;
466
+ height: 28px;
467
+ border-radius: 50%;
468
+ background-color: #6c757d;
469
+ color: white;
470
+ font-weight: bold;
471
+ margin: 6px 0;
472
+ flex-shrink: 0;
473
+ }
474
+ .rank-btn {
475
+ width: 28px;
476
+ height: 28px;
477
+ border: none;
478
+ background-color: #f0f0f0;
479
+ border-radius: 4px;
480
+ margin: 2px 0;
481
+ cursor: pointer;
482
+ display: flex;
483
+ align-items: center;
484
+ justify-content: center;
485
+ font-size: 14px;
486
+ }
487
+ .rank-btn:hover:not([disabled]) {
488
+ background-color: #e0e0e0;
489
+ }
490
+ .rank-btn:active:not([disabled]) {
491
+ background-color: #d0d0d0;
492
+ }
493
+ .rank-btn:disabled {
494
+ opacity: 0.5;
495
+ cursor: not-allowed;
496
+ }
497
+ .doc-content {
498
+ flex: 1;
499
+ line-height: 1.5;
500
+ word-break: break-word;
501
+ }
502
+ /* More professional color scheme for rank badges */
503
+ .rank-bg-1 .rank-badge { background-color: #1e40af; } /* Deep blue for top rank */
504
+ .rank-bg-2 .rank-badge { background-color: #3b82f6; } /* Medium blue */
505
+ .rank-bg-3 .rank-badge { background-color: #60a5fa; } /* Light blue */
506
+ .rank-bg-4 .rank-badge { background-color: #93c5fd; color: #1e3a8a; } /* Very light blue with dark text */
507
+ .rank-bg-5 .rank-badge { background-color: #bfdbfe; color: #1e3a8a; } /* Lightest blue with dark text */
508
+
509
+ /* Lower ranks get progressively more gray */
510
+ .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge {
511
+ background-color: #64748b;
512
+ }
513
+ .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge, .rank-bg-10 .rank-badge {
514
+ background-color: #94a3b8;
515
+ color: #0f172a;
516
+ }
517
+ .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge, .rank-bg-13 .rank-badge,
518
+ .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge, .rank-bg-16 .rank-badge,
519
+ .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge, .rank-bg-19 .rank-badge,
520
+ .rank-bg-20 .rank-badge {
521
+ background-color: #cbd5e1;
522
+ color: #0f172a;
523
+ }
524
+ .error {
525
+ padding: 16px;
526
+ background-color: #fee2e2;
527
+ border: 1px solid #f87171;
528
+ color: #b91c1c;
529
+ border-radius: 6px;
530
+ margin: 16px 0;
531
+ }
532
+ </style>
533
+ """
534
+ gr.HTML(js_code)
535
+
536
+ submit_btn.click(
537
+ save_ranking,
538
+ inputs=[order_state, current_sample_id],
539
+ outputs=[status_box, progress_text]
540
+ )
541
+
542
+ next_btn.click(
543
+ next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
544
+ ).then(
545
+ load_sample,
546
+ inputs=[current_sample_id],
547
+ outputs=[query_text, sortable_list, order_state, progress_text, status_box]
548
+ )
549
+
550
+ prev_btn.click(
551
+ prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
552
+ ).then(
553
+ load_sample,
554
+ inputs=[current_sample_id],
555
+ outputs=[query_text, sortable_list, order_state, progress_text, status_box]
556
+ )
557
+
558
+ save_btn.click(save_results, outputs=[status_box])
559
+
560
+ # Use a custom loading function with proper error handling
561
+ def safe_load_initial():
562
+ try:
563
+ if initial_sample_id and samples:
564
+ return load_sample(initial_sample_id)
565
+ else:
566
+ return "No query available", "<div>No documents available</div>", "[]", "No progress data", "Error: No samples found"
567
+ except Exception as e:
568
+ print(f"Error in initial load: {str(e)}")
569
+ return "Error loading query", "<div>Error loading documents</div>", "[]", "Error", f"Error: {str(e)}"
570
+
571
+ # Use the safe loading function to prevent scheduling failures
572
+ demo.load(safe_load_initial,
573
+ outputs=[query_text, sortable_list, order_state, progress_text, status_box])
574
+
575
+ return demo
576
+ except Exception as e:
577
+ import traceback
578
+ print(f"Error creating reranking interface: {traceback.format_exc()}")
579
+ # Return a simple error interface instead of failing completely
580
+ with gr.Blocks() as error_demo:
581
+ gr.Markdown("# Error Creating Reranking Interface")
582
+ gr.Markdown(f"An error occurred while creating the interface: **{str(e)}**")
583
+ gr.Markdown("Please check your task data and try again.")
584
+ return error_demo
585
 
586
+ # Main app with file upload capability and better error handling
587
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
588
  gr.Markdown("# MTEB Human Evaluation Demo")
589
 
 
595
  This interface allows you to evaluate the relevance of documents for reranking tasks.
596
  """)
597
 
598
+ # Function to get the most recent task file with error handling
599
  def get_latest_task_file():
600
+ try:
601
+ # Check first in uploaded_tasks directory
602
+ os.makedirs("uploaded_tasks", exist_ok=True)
603
+ uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
604
+
605
+ if uploaded_tasks:
606
+ # Sort by modification time, newest first
607
+ uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
608
+ return os.path.join("uploaded_tasks", uploaded_tasks[0])
609
+
610
+ # Fall back to default example
611
+ if os.path.exists("AskUbuntuDupQuestions_human_eval.json"):
612
+ return "AskUbuntuDupQuestions_human_eval.json"
613
+
614
+ # If no files found
615
+ return None
616
+ except Exception as e:
617
+ print(f"Error getting latest task file: {str(e)}")
618
+ return None
619
 
620
+ # Load the task file with proper error handling
621
  task_file = get_latest_task_file()
622
 
623
+ task_data = None
624
  try:
625
+ if task_file and os.path.exists(task_file):
626
+ with open(task_file, "r") as f:
627
+ task_data = json.load(f)
628
+
629
+ # Show which task is currently loaded
630
+ gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
631
+
632
+ # Display the interface
633
+ reranking_demo = create_reranking_interface(task_data)
634
+ else:
635
+ gr.Markdown("**No task file found**")
636
+ gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
637
+
638
+ # Create a dummy interface with instructions
639
+ with gr.Blocks() as dummy_demo:
640
+ gr.Markdown("### No Task Loaded")
641
+ gr.Markdown("Please go to the 'Upload & Evaluate' tab to upload a task file.")
642
+ reranking_demo = dummy_demo
643
  except Exception as e:
644
+ import traceback
645
+ print(f"Error loading task: {traceback.format_exc()}")
646
  gr.Markdown(f"**Error loading task: {str(e)}**")
647
  gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
648
+
649
+ # Create a simple error interface
650
+ with gr.Blocks() as error_demo:
651
+ gr.Markdown("### Error Loading Task")
652
+ gr.Markdown(f"An error occurred: **{str(e)}**")
653
+ gr.Markdown("Please try uploading a different task file.")
654
+ reranking_demo = error_demo
655
 
656
  with gr.TabItem("Upload & Evaluate"):
657
  gr.Markdown("""
 
857
  download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
858
 
859
  if __name__ == "__main__":
860
+ try:
861
+ demo.launch(prevent_thread_lock=True)
862
+ except Exception as e:
863
+ import traceback
864
+ print(f"Error launching demo: {traceback.format_exc()}")
865
+ print("\nTrying alternative launch method...")
866
+ try:
867
+ # Alternative launch method
868
+ demo.launch(share=False, debug=True)
869
+ except Exception as e2:
870
+ print(f"Alternative launch also failed: {str(e2)}")
871
+ print("\nPlease check your Gradio installation and try again.")