AdnanElAssadi commited on
Commit
76c554c
·
verified ·
1 Parent(s): d986f08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +416 -611
app.py CHANGED
@@ -5,585 +5,433 @@ from pathlib import Path
5
 
6
  def create_reranking_interface(task_data):
7
  """Create a Gradio interface for reranking evaluation using drag and drop."""
8
- try:
9
- samples = task_data["samples"]
10
- results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
11
- completed_samples = {s["id"]: False for s in samples}
12
-
13
- # Define helper functions before the UI elements are created
14
- def generate_sortable_html(candidates, existing_ranks=None):
15
- """Generate the HTML for the sortable list with up/down buttons."""
16
- try:
17
- if existing_ranks and len(existing_ranks) == len(candidates):
18
- order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
19
- else:
20
- order = list(range(len(candidates)))
21
-
22
- html = '<div id="sortable-container" class="sortable-container">'
23
- for rank_minus_1, idx in enumerate(order):
24
- if idx < len(candidates):
25
- doc = candidates[idx]
26
- rank = rank_minus_1 + 1
27
- import html as html_escaper
28
- escaped_doc = html_escaper.escape(doc)
29
-
30
- # Add navigation buttons (up/down arrows)
31
- up_disabled = "disabled" if rank == 1 else ""
32
- down_disabled = "disabled" if rank == len(candidates) else ""
33
-
34
- html += f'''\
35
- <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
36
- <div class="rank-controls">
37
- <button type="button" class="rank-btn up-btn" {up_disabled} onclick="window.moveItemUp({rank})">▲</button>
38
- <div class="rank-badge">{rank}</div>
39
- <button type="button" class="rank-btn down-btn" {down_disabled} onclick="window.moveItemDown({rank})">▼</button>
40
- </div>
41
- <div class="doc-content">{escaped_doc}</div>
42
- </div>
43
- '''
44
- html += '</div>'
45
-
46
- # Also return the computed order for proper initialization
47
- return html, order
48
- except Exception as e:
49
- print(f"Error in generate_sortable_html: {str(e)}")
50
- return f'<div class="error">Error generating ranking interface: {str(e)}</div>', []
51
 
52
- def save_ranking(order_json, sample_id):
53
- """Save the current ranking to results."""
54
- try:
55
- if not order_json or order_json == "[]":
56
- return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
57
-
58
- order = json.loads(order_json)
59
- sample = next((s for s in samples if s["id"] == sample_id), None)
60
-
61
- if not sample:
62
- return "⚠️ Sample not found.", progress_text.value
63
-
64
- num_candidates = len(sample["candidates"])
65
-
66
- if len(order) != num_candidates:
67
- return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
68
-
69
- rankings = [0] * num_candidates
70
- try:
71
- for rank_minus_1, doc_idx in enumerate(order):
72
- if doc_idx < num_candidates:
73
- rankings[doc_idx] = rank_minus_1 + 1
74
- else:
75
- raise ValueError(f"Invalid document index {doc_idx} found in order.")
76
- except Exception as e:
77
- return f"⚠️ Error processing ranking order: {str(e)}", progress_text.value
78
-
79
- if sorted(rankings) != list(range(1, num_candidates + 1)):
80
- return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
81
-
82
- annotation = {"sample_id": sample_id, "rankings": rankings}
83
-
84
- # Check if this sample was already annotated
85
- existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
86
- if existing_idx is not None:
87
- results["annotations"][existing_idx] = annotation
 
 
88
  else:
89
- results["annotations"].append(annotation)
90
-
91
- completed_samples[sample_id] = True
92
-
93
- # Save results with timestamp and better error handling
94
- try:
95
- output_path = f"{task_data['task_name']}_human_results.json"
96
- with open(output_path, "w") as f:
97
- json.dump(results, f, indent=2)
98
-
99
- # Check if all samples are complete
100
- all_completed = sum(completed_samples.values()) == len(samples)
101
- completion_message = "🎉 All samples completed! You can save and submit your results." if all_completed else ""
102
-
103
- return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed) {completion_message}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
104
- except Exception as file_error:
105
- print(f"Error saving file: {str(file_error)}")
106
- # Still mark as completed in memory even if file save fails
107
- return f"⚠️ Rankings recorded but file save failed: {str(file_error)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
108
-
109
- except json.JSONDecodeError:
110
- return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
111
- except Exception as e:
112
- import traceback
113
- print(traceback.format_exc())
114
- return f"Error saving ranking: {str(e)}", progress_text.value
115
-
116
- def load_sample(sample_id):
117
- """Load a sample into the interface."""
118
- try:
119
- sample = next((s for s in samples if s["id"] == sample_id), None)
120
- if not sample:
121
- return gr.update(), gr.update(), "[]", gr.update(), "Sample not found"
122
-
123
- existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
124
-
125
- # Get both the HTML and the initial order
126
- new_html, initial_order = generate_sortable_html(sample["candidates"], existing_ranking)
127
-
128
- # Convert initial order to JSON string for state
129
- initial_order_json = json.dumps(initial_order)
130
-
131
- status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
132
- progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
133
-
134
- return sample["query"], new_html, initial_order_json, progress, status
135
- except Exception as e:
136
- import traceback
137
- print(traceback.format_exc())
138
- return "Error loading sample", "<div>Error loading sample content</div>", "[]", "Error", f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- def next_sample_id(current_id):
141
- try:
142
- current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
143
- if current_idx == -1:
144
- return samples[0]["id"] if samples else current_id
145
- next_idx = min(current_idx + 1, len(samples) - 1)
146
- return samples[next_idx]["id"]
147
- except Exception as e:
148
- print(f"Error in next_sample_id: {str(e)}")
149
- return current_id
150
 
151
- def prev_sample_id(current_id):
152
- try:
153
- current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
154
- if current_idx == -1:
155
- return samples[0]["id"] if samples else current_id
156
- prev_idx = max(current_idx - 1, 0)
157
- return samples[prev_idx]["id"]
158
- except Exception as e:
159
- print(f"Error in prev_sample_id: {str(e)}")
160
- return current_id
161
 
162
- def save_results():
163
- output_path = f"{task_data['task_name']}_human_results.json"
164
- try:
165
- # Create backup with timestamp
166
- from datetime import datetime
167
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
168
- backup_path = f"{task_data['task_name']}_results_{timestamp}.json"
169
-
170
- # First create a backup
171
- with open(backup_path, "w") as f:
172
- json.dump(results, f, indent=2)
173
-
174
- # Then save to the main file
175
- with open(output_path, "w") as f:
176
- json.dump(results, f, indent=2)
177
-
178
- return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)\nBackup created at {backup_path}"
179
- except Exception as e:
180
- return f"⚠️ Error saving results file: {str(e)}"
181
 
182
- # Create an empty initial sample ID with proper error handling
183
- initial_sample_id = samples[0]["id"] if samples else None
184
- if not initial_sample_id:
185
- print("WARNING: No samples found in task data")
186
- return gr.HTML("No samples found in the task data. Please check your task file and try again.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
189
- gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
190
- with gr.Accordion("Instructions", open=True):
191
- gr.Markdown("""
192
- ## Task Instructions
193
-
194
- {instructions}
195
-
196
- ### How to use this interface:
197
- 1. Read the query at the top
198
- 2. Drag and drop documents to reorder them based on relevance
199
- 3. Top document = Rank 1, Second = Rank 2, etc.
200
- 4. Click "Submit Rankings" when you're done with the current query
201
- 5. Use "Previous" and "Next" to navigate between queries
202
- 6. Click "Save All Results" periodically to ensure your work is saved
203
- """.format(instructions=task_data.get("instructions", "Rank the documents based on their relevance to the query.")))
204
 
205
- current_sample_id = gr.State(value=initial_sample_id)
 
206
 
207
- with gr.Row():
208
- progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
209
- status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
210
-
211
- with gr.Group():
212
- gr.Markdown("## Query:")
213
- query_text = gr.Textbox(value="Loading query...", label="", interactive=False)
214
- gr.Markdown("## Documents to Rank (Drag to Reorder):")
215
- sortable_list = gr.HTML("Loading documents...", elem_id="sortable-list-container")
216
- order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
217
- with gr.Row():
218
- prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
219
- submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
220
- next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
221
- save_btn = gr.Button("💾 Save All Results", variant="secondary")
222
-
223
- js_code = """
224
- <script src="https://cdn.jsdelivr.net/npm/[email protected]/Sortable.min.js"></script>
225
- <script>
226
- // Make the functions globally available
227
- window.moveItemUp = function(currentRank) {
228
- console.log('Moving item up:', currentRank);
229
- if (currentRank <= 1) return; // Already at the top
230
-
231
- const container = document.getElementById('sortable-container');
232
- if (!container) {
233
- console.error('Container not found');
234
- return;
235
- }
236
-
237
- const items = Array.from(container.querySelectorAll('.sortable-item'));
238
- console.log('Found items:', items.length);
239
-
240
- // Find the items to swap by their data-rank attribute
241
- const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
242
- const aboveItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank - 1);
243
-
244
- if (!currentItem || !aboveItem) {
245
- console.error('Items not found:', currentItem, aboveItem);
246
- return;
 
 
 
 
 
 
 
 
 
 
247
  }
248
 
249
- console.log('Swapping items:', currentItem, aboveItem);
250
-
251
- // Swap the items in the DOM
252
- if (aboveItem.previousElementSibling) {
253
- container.insertBefore(currentItem, aboveItem);
254
- } else {
255
- container.insertBefore(currentItem, container.firstChild);
256
  }
257
 
258
- // Update ranks
259
- window.updateRanksAfterMove();
260
- };
261
 
262
- window.moveItemDown = function(currentRank) {
263
- console.log('Moving item down:', currentRank);
 
 
 
 
 
 
 
 
 
 
 
264
 
 
265
  const container = document.getElementById('sortable-container');
266
  if (!container) {
267
- console.error('Container not found');
 
268
  return;
269
  }
270
 
271
- const items = Array.from(container.querySelectorAll('.sortable-item'));
272
- console.log('Found items:', items.length);
273
-
274
- if (currentRank >= items.length) return; // Already at the bottom
275
-
276
- // Find the items to swap by their data-rank attribute
277
- const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
278
- const belowItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank + 1);
279
-
280
- if (!currentItem || !belowItem) {
281
- console.error('Items not found for moving down');
282
- return;
283
- }
284
-
285
- console.log('Swapping items down:', currentItem, belowItem);
286
-
287
- // Swap the items in the DOM - insert the current item after the below item
288
- container.insertBefore(currentItem, belowItem.nextElementSibling);
289
-
290
- // Update ranks
291
- window.updateRanksAfterMove();
292
- };
293
-
294
- window.updateRanksAfterMove = function() {
295
- console.log('Updating ranks');
296
- const container = document.getElementById('sortable-container');
297
- if (!container) {
298
- console.error('Container not found for rank update');
299
  return;
300
  }
301
 
302
- const items = Array.from(container.querySelectorAll('.sortable-item'));
303
- const orderInput = document.querySelector('#current-order textarea');
304
- if (!orderInput) {
305
- console.error('Order input not found');
306
  return;
307
  }
308
 
309
- const order = [];
310
- items.forEach((item, index) => {
311
- const rank = index + 1;
312
- const docId = parseInt(item.getAttribute('data-doc-id'));
313
-
314
- // Update rank display
315
- const rankBadge = item.querySelector('.rank-badge');
316
- if (rankBadge) rankBadge.textContent = rank;
317
-
318
- // Update item classes
319
- item.className = item.className.replace(/rank-bg-\\d+/g, '').trim();
320
- item.classList.add(`rank-bg-${rank}`);
321
-
322
- // Update data attribute
323
- item.setAttribute('data-rank', rank);
324
-
325
- // Update button states
326
- const upBtn = item.querySelector('.up-btn');
327
- const downBtn = item.querySelector('.down-btn');
328
-
329
- if (upBtn) {
330
- if (rank == 1) {
331
- upBtn.setAttribute('disabled', 'disabled');
332
- } else {
333
- upBtn.removeAttribute('disabled');
334
- }
335
- }
336
-
337
- if (downBtn) {
338
- if (rank == items.length) {
339
- downBtn.setAttribute('disabled', 'disabled');
340
- } else {
341
- downBtn.removeAttribute('disabled');
342
- }
343
  }
344
-
345
- order.push(docId);
346
  });
347
-
348
- // Update hidden input with JSON
349
- console.log('New order:', order);
350
- const newOrderValue = JSON.stringify(order);
351
- orderInput.value = newOrderValue;
352
-
353
- // Trigger input event
354
- const event = new Event('input', { bubbles: true });
355
- orderInput.dispatchEvent(event);
356
- };
357
 
358
- document.addEventListener('DOMContentLoaded', function() {
359
- console.log('DOM loaded, initializing ranking interface');
360
-
361
- // Function to initialize the interface
362
- function initializeRankingInterface() {
363
- const container = document.getElementById('sortable-container');
364
- if (!container) {
365
- console.log('Container not found, retrying in 200ms');
366
- setTimeout(initializeRankingInterface, 200);
367
- return;
368
- }
369
-
370
- console.log('Sortable container found, setting up');
371
-
372
- // Add click events directly to buttons as a backup
373
- const upButtons = container.querySelectorAll('.up-btn');
374
- const downButtons = container.querySelectorAll('.down-btn');
375
-
376
- upButtons.forEach(btn => {
377
- btn.addEventListener('click', function() {
378
- const item = this.closest('.sortable-item');
379
- const rank = parseInt(item.getAttribute('data-rank'));
380
- window.moveItemUp(rank);
381
- });
382
- });
383
-
384
- downButtons.forEach(btn => {
385
- btn.addEventListener('click', function() {
386
- const item = this.closest('.sortable-item');
387
- const rank = parseInt(item.getAttribute('data-rank'));
388
- window.moveItemDown(rank);
389
- });
390
- });
391
-
392
- // Initialize drag-and-drop as fallback
393
- if (typeof Sortable !== 'undefined') {
394
- if (!container.sortableInstance) {
395
- container.sortableInstance = new Sortable(container, {
396
- animation: 150,
397
- ghostClass: "sortable-ghost",
398
- onEnd: function() {
399
- window.updateRanksAfterMove();
400
- }
401
- });
402
- }
403
- } else {
404
- console.log('Sortable library not available');
405
- }
406
-
407
- // Initialize the ranking
408
- window.updateRanksAfterMove();
409
- }
410
-
411
- // Initialize immediately
412
- initializeRankingInterface();
413
-
414
- // Also observe DOM changes to reinitialize when needed
415
- const targetNode = document.getElementById('sortable-list-container');
416
- if (targetNode) {
417
- const config = { childList: true, subtree: true };
418
- const observer = new MutationObserver(function(mutationsList) {
419
- for(const mutation of mutationsList) {
420
- if (mutation.type === 'childList') {
421
- if (document.getElementById('sortable-container')) {
422
- console.log('DOM changed, reinitializing');
423
- initializeRankingInterface();
424
- }
425
  }
426
  }
427
- });
428
- observer.observe(targetNode, config);
429
- }
430
- });
431
- </script>
432
- <style>
433
- .sortable-container {
434
- display: flex;
435
- flex-direction: column;
436
- gap: 12px;
437
- min-height: 200px;
438
- padding: 16px;
439
- background-color: #f8f9fa;
440
- border-radius: 8px;
441
- }
442
- .sortable-item {
443
- padding: 14px;
444
- background-color: #fff;
445
- border: 1px solid #e0e0e0;
446
- border-radius: 6px;
447
- display: flex;
448
- align-items: center;
449
- transition: all 0.2s ease;
450
- }
451
- .sortable-item:hover {
452
- background-color: #f8f9fa;
453
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
454
- }
455
- .rank-controls {
456
- display: flex;
457
- flex-direction: column;
458
- align-items: center;
459
- margin-right: 16px;
460
- }
461
- .rank-badge {
462
- display: flex;
463
- align-items: center;
464
- justify-content: center;
465
- width: 28px;
466
- height: 28px;
467
- border-radius: 50%;
468
- background-color: #6c757d;
469
- color: white;
470
- font-weight: bold;
471
- margin: 6px 0;
472
- flex-shrink: 0;
473
- }
474
- .rank-btn {
475
- width: 28px;
476
- height: 28px;
477
- border: none;
478
- background-color: #f0f0f0;
479
- border-radius: 4px;
480
- margin: 2px 0;
481
- cursor: pointer;
482
- display: flex;
483
- align-items: center;
484
- justify-content: center;
485
- font-size: 14px;
486
- }
487
- .rank-btn:hover:not([disabled]) {
488
- background-color: #e0e0e0;
489
- }
490
- .rank-btn:active:not([disabled]) {
491
- background-color: #d0d0d0;
492
- }
493
- .rank-btn:disabled {
494
- opacity: 0.5;
495
- cursor: not-allowed;
496
- }
497
- .doc-content {
498
- flex: 1;
499
- line-height: 1.5;
500
- word-break: break-word;
501
- }
502
- /* More professional color scheme for rank badges */
503
- .rank-bg-1 .rank-badge { background-color: #1e40af; } /* Deep blue for top rank */
504
- .rank-bg-2 .rank-badge { background-color: #3b82f6; } /* Medium blue */
505
- .rank-bg-3 .rank-badge { background-color: #60a5fa; } /* Light blue */
506
- .rank-bg-4 .rank-badge { background-color: #93c5fd; color: #1e3a8a; } /* Very light blue with dark text */
507
- .rank-bg-5 .rank-badge { background-color: #bfdbfe; color: #1e3a8a; } /* Lightest blue with dark text */
508
-
509
- /* Lower ranks get progressively more gray */
510
- .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge {
511
- background-color: #64748b;
512
- }
513
- .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge, .rank-bg-10 .rank-badge {
514
- background-color: #94a3b8;
515
- color: #0f172a;
516
- }
517
- .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge, .rank-bg-13 .rank-badge,
518
- .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge, .rank-bg-16 .rank-badge,
519
- .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge, .rank-bg-19 .rank-badge,
520
- .rank-bg-20 .rank-badge {
521
- background-color: #cbd5e1;
522
- color: #0f172a;
523
- }
524
- .error {
525
- padding: 16px;
526
- background-color: #fee2e2;
527
- border: 1px solid #f87171;
528
- color: #b91c1c;
529
- border-radius: 6px;
530
- margin: 16px 0;
531
  }
532
- </style>
533
- """
534
- gr.HTML(js_code)
535
-
536
- submit_btn.click(
537
- save_ranking,
538
- inputs=[order_state, current_sample_id],
539
- outputs=[status_box, progress_text]
540
- )
541
-
542
- next_btn.click(
543
- next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
544
- ).then(
545
- load_sample,
546
- inputs=[current_sample_id],
547
- outputs=[query_text, sortable_list, order_state, progress_text, status_box]
548
- )
549
-
550
- prev_btn.click(
551
- prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
552
- ).then(
553
- load_sample,
554
- inputs=[current_sample_id],
555
- outputs=[query_text, sortable_list, order_state, progress_text, status_box]
556
- )
557
-
558
- save_btn.click(save_results, outputs=[status_box])
559
-
560
- # Use a custom loading function with proper error handling
561
- def safe_load_initial():
562
- try:
563
- if initial_sample_id and samples:
564
- return load_sample(initial_sample_id)
565
- else:
566
- return "No query available", "<div>No documents available</div>", "[]", "No progress data", "Error: No samples found"
567
- except Exception as e:
568
- print(f"Error in initial load: {str(e)}")
569
- return "Error loading query", "<div>Error loading documents</div>", "[]", "Error", f"Error: {str(e)}"
570
-
571
- # Use the safe loading function to prevent scheduling failures
572
- demo.load(safe_load_initial,
573
- outputs=[query_text, sortable_list, order_state, progress_text, status_box])
574
-
575
- return demo
576
- except Exception as e:
577
- import traceback
578
- print(f"Error creating reranking interface: {traceback.format_exc()}")
579
- # Return a simple error interface instead of failing completely
580
- with gr.Blocks() as error_demo:
581
- gr.Markdown("# Error Creating Reranking Interface")
582
- gr.Markdown(f"An error occurred while creating the interface: **{str(e)}**")
583
- gr.Markdown("Please check your task data and try again.")
584
- return error_demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
 
586
- # Main app with file upload capability and better error handling
587
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
588
  gr.Markdown("# MTEB Human Evaluation Demo")
589
 
@@ -595,63 +443,35 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
595
  This interface allows you to evaluate the relevance of documents for reranking tasks.
596
  """)
597
 
598
- # Function to get the most recent task file with error handling
599
  def get_latest_task_file():
600
- try:
601
- # Check first in uploaded_tasks directory
602
- os.makedirs("uploaded_tasks", exist_ok=True)
603
- uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
604
-
605
- if uploaded_tasks:
606
- # Sort by modification time, newest first
607
- uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
608
- return os.path.join("uploaded_tasks", uploaded_tasks[0])
609
-
610
- # Fall back to default example
611
- if os.path.exists("AskUbuntuDupQuestions_human_eval.json"):
612
- return "AskUbuntuDupQuestions_human_eval.json"
613
-
614
- # If no files found
615
- return None
616
- except Exception as e:
617
- print(f"Error getting latest task file: {str(e)}")
618
- return None
619
 
620
- # Load the task file with proper error handling
621
  task_file = get_latest_task_file()
622
 
623
- task_data = None
624
  try:
625
- if task_file and os.path.exists(task_file):
626
- with open(task_file, "r") as f:
627
- task_data = json.load(f)
628
-
629
- # Show which task is currently loaded
630
- gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
631
-
632
- # Display the interface
633
- reranking_demo = create_reranking_interface(task_data)
634
- else:
635
- gr.Markdown("**No task file found**")
636
- gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
637
-
638
- # Create a dummy interface with instructions
639
- with gr.Blocks() as dummy_demo:
640
- gr.Markdown("### No Task Loaded")
641
- gr.Markdown("Please go to the 'Upload & Evaluate' tab to upload a task file.")
642
- reranking_demo = dummy_demo
643
  except Exception as e:
644
- import traceback
645
- print(f"Error loading task: {traceback.format_exc()}")
646
  gr.Markdown(f"**Error loading task: {str(e)}**")
647
  gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
648
-
649
- # Create a simple error interface
650
- with gr.Blocks() as error_demo:
651
- gr.Markdown("### Error Loading Task")
652
- gr.Markdown(f"An error occurred: **{str(e)}**")
653
- gr.Markdown("Please try uploading a different task file.")
654
- reranking_demo = error_demo
655
 
656
  with gr.TabItem("Upload & Evaluate"):
657
  gr.Markdown("""
@@ -857,19 +677,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
857
  download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
858
 
859
  if __name__ == "__main__":
860
- try:
861
- # Use options compatible with Gradio 3.42.0
862
- import os
863
- # Disable file watching to prevent restart loops
864
- os.environ['GRADIO_WATCH'] = 'no'
865
- demo.launch(show_error=True)
866
- except Exception as e:
867
- import traceback
868
- print(f"Error launching demo: {traceback.format_exc()}")
869
- print("\nTrying alternative launch method...")
870
- try:
871
- # Alternative launch method
872
- demo.launch(share=False, debug=True)
873
- except Exception as e2:
874
- print(f"Alternative launch also failed: {str(e2)}")
875
- print("\nPlease check your Gradio installation and try again.")
 
5
 
6
  def create_reranking_interface(task_data):
7
  """Create a Gradio interface for reranking evaluation using drag and drop."""
8
+ samples = task_data["samples"]
9
+ results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
10
+ completed_samples = {s["id"]: False for s in samples}
11
+
12
+ # Define helper functions before UI elements are created
13
+ def generate_sortable_html(candidates, existing_ranks=None):
14
+ """Generate the HTML for the sortable list with up/down buttons."""
15
+ if existing_ranks and len(existing_ranks) == len(candidates):
16
+ order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
17
+ else:
18
+ order = list(range(len(candidates)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ html = '<div id="sortable-container" class="sortable-container">'
21
+ for rank_minus_1, idx in enumerate(order):
22
+ if idx < len(candidates):
23
+ doc = candidates[idx]
24
+ rank = rank_minus_1 + 1
25
+ import html as html_escaper
26
+ escaped_doc = html_escaper.escape(doc)
27
+
28
+ # Add navigation buttons (up/down arrows)
29
+ up_disabled = "disabled" if rank == 1 else ""
30
+ down_disabled = "disabled" if rank == len(candidates) else ""
31
+
32
+ html += f'''\
33
+ <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
34
+ <div class="rank-controls">
35
+ <button class="rank-btn up-btn" {up_disabled} onclick="moveItemUp({rank})">▲</button>
36
+ <div class="rank-badge">{rank}</div>
37
+ <button class="rank-btn down-btn" {down_disabled} onclick="moveItemDown({rank})">▼</button>
38
+ </div>
39
+ <div class="doc-content">{escaped_doc}</div>
40
+ </div>
41
+ '''
42
+ html += '</div>'
43
+ return html
44
+
45
+ def save_ranking(order_json, sample_id):
46
+ """Save the current ranking to results."""
47
+ try:
48
+ if not order_json or order_json == "[]":
49
+ return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
50
+ order = json.loads(order_json)
51
+ num_candidates = len(next(s["candidates"] for s in samples if s["id"] == sample_id))
52
+ if len(order) != num_candidates:
53
+ return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
54
+ rankings = [0] * num_candidates
55
+ for rank_minus_1, doc_idx in enumerate(order):
56
+ if doc_idx < num_candidates:
57
+ rankings[doc_idx] = rank_minus_1 + 1
58
  else:
59
+ raise ValueError(f"Invalid document index {doc_idx} found in order.")
60
+ if sorted(rankings) != list(range(1, num_candidates + 1)):
61
+ return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
62
+ annotation = {"sample_id": sample_id, "rankings": rankings}
63
+ existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
64
+ if existing_idx is not None:
65
+ results["annotations"][existing_idx] = annotation
66
+ else:
67
+ results["annotations"].append(annotation)
68
+ completed_samples[sample_id] = True
69
+ output_path = f"{task_data['task_name']}_human_results.json"
70
+ with open(output_path, "w") as f:
71
+ json.dump(results, f, indent=2)
72
+ return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
73
+ except json.JSONDecodeError:
74
+ return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
75
+ except Exception as e:
76
+ import traceback
77
+ print(traceback.format_exc())
78
+ return f"Error saving ranking: {str(e)}", progress_text.value
79
+
80
+ def load_sample(sample_id):
81
+ """Load a sample into the interface."""
82
+ try:
83
+ sample = next((s for s in samples if s["id"] == sample_id), None)
84
+ if not sample:
85
+ return gr.update(), gr.update(value="[]"), gr.update(), gr.update()
86
+ existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
87
+ new_html = generate_sortable_html(sample["candidates"], existing_ranking)
88
+ status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
89
+ progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
90
+ return sample["query"], new_html, "[]", progress, status
91
+ except Exception as e:
92
+ return gr.update(), gr.update(value="[]"), gr.update(), gr.update(value=f"Error loading sample: {str(e)}")
93
+
94
+ def next_sample_id(current_id):
95
+ current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
96
+ if current_idx == -1:
97
+ return current_id
98
+ next_idx = min(current_idx + 1, len(samples) - 1)
99
+ return samples[next_idx]["id"]
100
+
101
+ def prev_sample_id(current_id):
102
+ current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
103
+ if current_idx == -1:
104
+ return current_id
105
+ prev_idx = max(current_idx - 1, 0)
106
+ return samples[prev_idx]["id"]
107
+
108
+ def save_results():
109
+ output_path = f"{task_data['task_name']}_human_results.json"
110
+ try:
111
+ with open(output_path, "w") as f:
112
+ json.dump(results, f, indent=2)
113
+ return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
114
+ except Exception as e:
115
+ return f"⚠️ Error saving results file: {str(e)}"
116
+
117
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
118
+ gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
119
+ with gr.Accordion("Instructions", open=True):
120
+ gr.Markdown("""
121
+ ## Task Instructions
122
+
123
+ {instructions}
124
+
125
+ ### How to use this interface:
126
+ 1. Read the query at the top
127
+ 2. Drag and drop documents to reorder them based on relevance
128
+ 3. Top document = Rank 1, Second = Rank 2, etc.
129
+ 4. Click "Submit Rankings" when you're done with the current query
130
+ 5. Use "Previous" and "Next" to navigate between queries
131
+ 6. Click "Save All Results" periodically to ensure your work is saved
132
+ """.format(instructions=task_data["instructions"]))
133
 
134
+ current_sample_id = gr.State(value=samples[0]["id"])
 
 
 
 
 
 
 
 
 
135
 
136
+ with gr.Row():
137
+ progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
138
+ status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
 
 
 
 
 
 
 
139
 
140
+ with gr.Group():
141
+ gr.Markdown("## Query:")
142
+ query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
143
+ gr.Markdown("## Documents to Rank (Drag to Reorder):")
144
+ sortable_list = gr.HTML(generate_sortable_html(samples[0]["candidates"], []), elem_id="sortable-list-container")
145
+ order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
146
+ with gr.Row():
147
+ prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
148
+ submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
149
+ next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
150
+ save_btn = gr.Button("💾 Save All Results", variant="secondary")
 
 
 
 
 
 
 
 
151
 
152
+ js_code = """
153
+ <script src="https://cdn.jsdelivr.net/npm/[email protected].0/Sortable.min.js"></script>
154
+ <script>
155
+ // Function to move an item up in the ranking
156
+ function moveItemUp(currentRank) {
157
+ if (currentRank <= 1) return; // Already at the top
158
+
159
+ const container = document.getElementById('sortable-container');
160
+ if (!container) return;
161
+
162
+ const items = container.querySelectorAll('.sortable-item');
163
+ const itemsArray = Array.from(items);
164
+
165
+ // Find the items to swap
166
+ const currentItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank);
167
+ const aboveItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank - 1);
168
+
169
+ if (!currentItem || !aboveItem) return;
170
+
171
+ // Swap the items
172
+ aboveItem.parentNode.insertBefore(currentItem, aboveItem);
173
+
174
+ // Update ranks
175
+ updateRanksAfterMove();
176
+ }
177
 
178
+ // Function to move an item down in the ranking
179
+ function moveItemDown(currentRank) {
180
+ const container = document.getElementById('sortable-container');
181
+ if (!container) return;
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ const items = container.querySelectorAll('.sortable-item');
184
+ if (currentRank >= items.length) return; // Already at the bottom
185
 
186
+ const itemsArray = Array.from(items);
187
+
188
+ // Find the items to swap
189
+ const currentItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank);
190
+ const belowItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank + 1);
191
+
192
+ if (!currentItem || !belowItem) return;
193
+
194
+ // Swap the items
195
+ belowItem.parentNode.insertBefore(belowItem, currentItem);
196
+
197
+ // Update ranks
198
+ updateRanksAfterMove();
199
+ }
200
+
201
+ // Update rank numbers and classes after moving
202
+ function updateRanksAfterMove() {
203
+ const container = document.getElementById('sortable-container');
204
+ if (!container) return;
205
+
206
+ const items = container.querySelectorAll('.sortable-item');
207
+ const orderInput = document.querySelector('#current-order textarea');
208
+ if (!orderInput) return;
209
+
210
+ const order = [];
211
+ items.forEach((item, index) => {
212
+ const rank = index + 1;
213
+ const docId = parseInt(item.getAttribute('data-doc-id'));
214
+
215
+ // Update rank display
216
+ const rankBadge = item.querySelector('.rank-badge');
217
+ if (rankBadge) rankBadge.textContent = rank;
218
+
219
+ // Update item classes
220
+ item.className = item.className.replace(/rank-bg-\d+/g, '').trim();
221
+ item.classList.add(`rank-bg-${rank}`);
222
+
223
+ // Update data attribute
224
+ item.setAttribute('data-rank', rank);
225
+
226
+ // Update button states
227
+ const upBtn = item.querySelector('.up-btn');
228
+ const downBtn = item.querySelector('.down-btn');
229
+
230
+ if (upBtn) {
231
+ if (rank == 1) {
232
+ upBtn.setAttribute('disabled', 'disabled');
233
+ } else {
234
+ upBtn.removeAttribute('disabled');
235
+ }
236
  }
237
 
238
+ if (downBtn) {
239
+ if (rank == items.length) {
240
+ downBtn.setAttribute('disabled', 'disabled');
241
+ } else {
242
+ downBtn.removeAttribute('disabled');
243
+ }
 
244
  }
245
 
246
+ order.push(docId);
247
+ });
 
248
 
249
+ // Update hidden input
250
+ const newOrderValue = JSON.stringify(order);
251
+ if (orderInput.value !== newOrderValue) {
252
+ orderInput.value = newOrderValue;
253
+ const event = new Event('input', { bubbles: true });
254
+ orderInput.dispatchEvent(event);
255
+ }
256
+ }
257
+
258
+ document.addEventListener('DOMContentLoaded', function() {
259
+ function initializeSortable() {
260
+ // Initialize event handlers for buttons
261
+ updateRanksAfterMove();
262
 
263
+ // Keep drag-and-drop as a fallback
264
  const container = document.getElementById('sortable-container');
265
  if (!container) {
266
+ console.log('Container not found, retrying...');
267
+ setTimeout(initializeSortable, 200);
268
  return;
269
  }
270
 
271
+ if (typeof Sortable === 'undefined') {
272
+ console.log('Sortable not loaded, retrying...');
273
+ setTimeout(initializeSortable, 200);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  return;
275
  }
276
 
277
+ if (container.sortableInstance) {
 
 
 
278
  return;
279
  }
280
 
281
+ container.sortableInstance = new Sortable(container, {
282
+ animation: 150,
283
+ ghostClass: "sortable-ghost",
284
+ onEnd: function() {
285
+ updateRanksAfterMove();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
 
 
287
  });
288
+ }
 
 
 
 
 
 
 
 
 
289
 
290
+ // Initialize immediately and also set up a mutation observer
291
+ initializeSortable();
292
+
293
+ const targetNode = document.getElementById('sortable-list-container');
294
+ if (targetNode) {
295
+ const config = { childList: true, subtree: true };
296
+ const observer = new MutationObserver(function(mutationsList) {
297
+ for(const mutation of mutationsList) {
298
+ if (mutation.type === 'childList') {
299
+ if (document.getElementById('sortable-container')) {
300
+ initializeSortable();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  }
302
  }
303
+ }
304
+ });
305
+ observer.observe(targetNode, config);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  }
307
+ });
308
+ </script>
309
+ <style>
310
+ .sortable-container {
311
+ display: flex;
312
+ flex-direction: column;
313
+ gap: 8px;
314
+ min-height: 200px;
315
+ padding: 10px;
316
+ background-color: #f8f9fa;
317
+ border-radius: 8px;
318
+ }
319
+ .sortable-item {
320
+ padding: 12px 15px;
321
+ background-color: #fff;
322
+ border: 1px solid #e0e0e0;
323
+ border-radius: 6px;
324
+ cursor: grab;
325
+ display: flex;
326
+ align-items: center;
327
+ transition: all 0.2s ease;
328
+ user-select: none;
329
+ }
330
+ .sortable-item:hover {
331
+ background-color: #f8f9fa;
332
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
333
+ }
334
+ .sortable-ghost {
335
+ background-color: #e3f2fd !important;
336
+ border-style: dashed !important;
337
+ opacity: 0.8;
338
+ }
339
+ .sortable-chosen {
340
+ cursor: grabbing;
341
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
342
+ }
343
+ .rank-controls {
344
+ display: flex;
345
+ flex-direction: column;
346
+ align-items: center;
347
+ margin-right: 15px;
348
+ }
349
+ .rank-badge {
350
+ display: flex;
351
+ align-items: center;
352
+ justify-content: center;
353
+ width: 28px;
354
+ height: 28px;
355
+ border-radius: 50%;
356
+ background-color: #6c757d;
357
+ color: white;
358
+ font-weight: bold;
359
+ margin: 4px 0;
360
+ flex-shrink: 0;
361
+ }
362
+ .rank-btn {
363
+ border: none;
364
+ background: #f0f0f0;
365
+ border-radius: 4px;
366
+ width: 24px;
367
+ height: 24px;
368
+ font-size: 12px;
369
+ line-height: 1;
370
+ display: flex;
371
+ align-items: center;
372
+ justify-content: center;
373
+ cursor: pointer;
374
+ color: #333;
375
+ }
376
+ .rank-btn:hover:not([disabled]) {
377
+ background: #e0e0e0;
378
+ }
379
+ .rank-btn:disabled {
380
+ opacity: 0.5;
381
+ cursor: not-allowed;
382
+ }
383
+ .doc-content {
384
+ flex: 1;
385
+ line-height: 1.4;
386
+ word-break: break-word;
387
+ }
388
+ .rank-bg-1 .rank-badge { background-color: #198754; }
389
+ .rank-bg-2 .rank-badge { background-color: #20c997; }
390
+ .rank-bg-3 .rank-badge { background-color: #ffc107; color: #333; }
391
+ .rank-bg-4 .rank-badge { background-color: #fd7e14; }
392
+ .rank-bg-5 .rank-badge { background-color: #dc3545; }
393
+ .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge { background-color: #6f42c1; }
394
+ .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge { background-color: #d63384; }
395
+ .rank-bg-10 .rank-badge, .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge,
396
+ .rank-bg-13 .rank-badge, .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge,
397
+ .rank-bg-16 .rank-badge, .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge,
398
+ .rank-bg-19 .rank-badge, .rank-bg-20 .rank-badge {
399
+ background-color: #6c757d;
400
+ }
401
+ </style>
402
+ """
403
+ gr.HTML(js_code)
404
+
405
+ submit_btn.click(
406
+ save_ranking,
407
+ inputs=[order_state, current_sample_id],
408
+ outputs=[status_box, progress_text]
409
+ )
410
+
411
+ next_btn.click(
412
+ next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
413
+ ).then(
414
+ load_sample,
415
+ inputs=[current_sample_id],
416
+ outputs=[query_text, sortable_list, order_state, progress_text, status_box]
417
+ )
418
+
419
+ prev_btn.click(
420
+ prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
421
+ ).then(
422
+ load_sample,
423
+ inputs=[current_sample_id],
424
+ outputs=[query_text, sortable_list, order_state, progress_text, status_box]
425
+ )
426
+
427
+ save_btn.click(save_results, outputs=[status_box])
428
+
429
+ demo.load(lambda: load_sample(samples[0]['id']),
430
+ outputs=[query_text, sortable_list, order_state, progress_text, status_box])
431
+
432
+ return demo
433
 
434
+ # Main app with file upload capability
435
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
436
  gr.Markdown("# MTEB Human Evaluation Demo")
437
 
 
443
  This interface allows you to evaluate the relevance of documents for reranking tasks.
444
  """)
445
 
446
+ # Function to get the most recent task file
447
  def get_latest_task_file():
448
+ # Check first in uploaded_tasks directory
449
+ os.makedirs("uploaded_tasks", exist_ok=True)
450
+ uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
451
+
452
+ if uploaded_tasks:
453
+ # Sort by modification time, newest first
454
+ uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
455
+ return os.path.join("uploaded_tasks", uploaded_tasks[0])
456
+
457
+ # Fall back to default example
458
+ return "AskUbuntuDupQuestions_human_eval.json"
 
 
 
 
 
 
 
 
459
 
460
+ # Load the task file
461
  task_file = get_latest_task_file()
462
 
 
463
  try:
464
+ with open(task_file, "r") as f:
465
+ task_data = json.load(f)
466
+
467
+ # Show which task is currently loaded
468
+ gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
469
+
470
+ # Display the interface
471
+ reranking_demo = create_reranking_interface(task_data)
 
 
 
 
 
 
 
 
 
 
472
  except Exception as e:
 
 
473
  gr.Markdown(f"**Error loading task: {str(e)}**")
474
  gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
 
 
 
 
 
 
 
475
 
476
  with gr.TabItem("Upload & Evaluate"):
477
  gr.Markdown("""
 
677
  download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
678
 
679
  if __name__ == "__main__":
680
+ demo.launch()