AdnanElAssadi commited on
Commit
00d7727
·
verified ·
1 Parent(s): ac98842

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +511 -97
app.py CHANGED
@@ -78,9 +78,19 @@ def create_reranking_interface(task_data):
78
  return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
79
 
80
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
81
- gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
 
 
 
 
 
 
 
 
 
82
 
83
- with gr.Accordion("Instructions", open=True):
 
84
  gr.Markdown("""
85
  ## Task Instructions
86
 
@@ -96,77 +106,180 @@ def create_reranking_interface(task_data):
96
  7. Your rankings are automatically saved when you submit or navigate
97
  """.format(instructions=task_data.get("instructions", "Rank documents by their relevance to the query.")))
98
 
 
99
  current_sample_id = gr.State(value=samples[0]["id"])
100
  auto_save_enabled = gr.State(value=True)
101
 
102
- with gr.Row():
103
- progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
104
- status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
105
- auto_save_toggle = gr.Checkbox(label="Auto-save when navigating", value=True)
 
 
 
 
 
 
 
 
 
106
 
 
107
  with gr.Group():
108
- gr.Markdown("## Query:")
109
- query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- gr.Markdown("## Documents to Rank:")
 
112
 
113
- # Create document displays and ranking inputs in synchronized pairs
114
  doc_containers = []
115
  ranking_inputs = []
116
  validation_indicators = []
117
 
118
- with gr.Column():
119
- # Quick ranking tools
120
- with gr.Row():
121
- gr.Markdown("### Quick Ranking Options:")
122
- sequential_btn = gr.Button("Rank in Order (1,2,3...)")
123
- reverse_btn = gr.Button("Reverse Order (n,n-1,...)")
124
- clear_btn = gr.Button("Clear All Rankings")
125
-
126
- # Document display with better UI for ranking
127
  for i, doc in enumerate(samples[0]["candidates"]):
128
- with gr.Row():
129
- with gr.Column(scale=4):
 
 
 
 
130
  doc_box = gr.Textbox(
131
  value=doc,
132
  label=f"Document {i+1}",
133
- interactive=False
 
134
  )
135
  doc_containers.append(doc_box)
136
 
137
- with gr.Column(scale=1):
138
- # Use Dropdown instead of Radio for compatibility with Gradio 3.x
139
  rank_input = gr.Dropdown(
140
  choices=[str(j) for j in range(1, len(samples[0]["candidates"])+1)],
141
  label=f"Rank",
142
- value=""
 
143
  )
144
  ranking_inputs.append(rank_input)
145
 
146
- # Add validation indicator
147
- with gr.Column(scale=1, min_width=50):
148
  validation = gr.HTML(value="")
149
  validation_indicators.append(validation)
150
 
151
- with gr.Row():
 
152
  prev_btn = gr.Button("← Previous Query", size="sm")
153
  submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
154
  next_btn = gr.Button("Next Query →", size="sm")
155
 
 
156
  with gr.Row():
157
- save_btn = gr.Button("💾 Save All Results", variant="secondary")
158
  results_info = gr.HTML(value=f"<p>Results will be saved to <code>{task_data['task_name']}_human_results.json</code></p>")
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  def validate_rankings(*rankings):
161
- """Validate rankings and update indicators."""
162
  results = []
163
  all_valid = True
164
  for rank in rankings:
165
  if rank is None or rank == "":
166
- results.append("⚠️")
167
  all_valid = False
168
  else:
169
- results.append("")
170
 
171
  return results + [all_valid] # Return validation indicators and validity flag
172
 
@@ -284,6 +397,7 @@ def create_reranking_interface(task_data):
284
 
285
  # Define a function that collects all ranking values and validates them
286
  def submit_rankings(*args):
 
287
  # Get the last argument (sample_id) and the rankings
288
  if len(args) < 1:
289
  return "Error: No arguments provided", progress_text.value
@@ -305,14 +419,84 @@ def create_reranking_interface(task_data):
305
  if i < len(validation_indicators):
306
  validation_indicators[i].update(value=result)
307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  # If not all valid, return error message
309
  if not all_valid:
310
  return "⚠️ Please assign a rank to all documents before submitting", progress_text.value
311
 
312
  # Save the validated rankings
313
  status, progress = save_ranking(rankings, sample_id)
 
 
 
 
 
 
 
 
314
  return status, progress
315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  # Wire up events (Gradio 3.x syntax)
317
  submit_btn.click(
318
  fn=submit_rankings,
@@ -389,6 +573,190 @@ def create_reranking_interface(task_data):
389
  inputs=[auto_save_toggle],
390
  outputs=[auto_save_enabled]
391
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
  return demo
394
 
@@ -400,6 +768,45 @@ def create_main_app():
400
  task_container = gr.HTML()
401
  loaded_task_info = gr.JSON(label="Loaded Task Information", visible=False)
402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  tabs = gr.Tabs()
404
 
405
  with tabs:
@@ -408,7 +815,7 @@ def create_main_app():
408
  ## MTEB Human Evaluation Interface
409
 
410
  This interface allows you to evaluate the relevance of documents for reranking tasks.
411
- """)
412
 
413
  # Function to get the most recent task file
414
  def get_latest_task_file():
@@ -447,76 +854,80 @@ def create_main_app():
447
  # Load the task file
448
  task_file = get_latest_task_file()
449
 
450
- if task_file:
451
- try:
452
- with open(task_file, "r") as f:
453
- task_data = json.load(f)
454
-
455
- # Show which task is currently loaded
456
- gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
457
-
458
- # Display the interface
459
- demo = create_reranking_interface(task_data)
460
- task_container.update(value=f"<p>Task loaded: {task_file}</p>")
461
- except Exception as e:
462
- gr.Markdown(f"**Error loading task: {str(e)}**")
 
 
 
 
463
  gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
464
- else:
465
- gr.Markdown("**No task file found**")
466
- gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
467
 
468
  with gr.TabItem("Upload & Evaluate"):
469
  gr.Markdown("""
470
  ## Upload Your Own Task File
471
 
472
  If you have a prepared task file, you can upload it here to create an evaluation interface.
473
- """)
474
 
475
  with gr.Row():
476
  with gr.Column(scale=1):
477
- file_input = gr.File(label="Upload a task file (JSON)")
478
- load_btn = gr.Button("Load Task")
479
- message = gr.Textbox(label="Status", interactive=False)
 
480
 
481
  # Add task list for previously uploaded tasks
482
- gr.Markdown("### Previous Uploads")
483
-
484
- # Function to list existing task files in the tasks directory
485
- def list_task_files():
486
- os.makedirs("uploaded_tasks", exist_ok=True)
487
- tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
488
- if not tasks:
489
- return "No task files uploaded yet."
490
- return "\n".join([f"- {t}" for t in tasks])
491
-
492
- task_list = gr.Markdown(list_task_files())
493
- refresh_btn = gr.Button("Refresh List")
 
494
 
495
  # Add results management section
496
- gr.Markdown("### Results Management")
497
-
498
- # Function to list existing result files
499
- def list_result_files():
500
- results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
501
- if not results:
502
- return "No result files available yet."
503
 
504
- result_links = []
505
- for r in results:
506
- # Calculate completion stats
507
- try:
508
- with open(r, "r") as f:
509
- result_data = json.load(f)
510
- annotation_count = len(result_data.get("annotations", []))
511
- task_name = result_data.get("task_name", "Unknown")
512
- result_links.append(f"- {r} ({annotation_count} annotations for {task_name})")
513
- except:
514
- result_links.append(f"- {r}")
 
 
 
 
 
 
 
 
515
 
516
- return "\n".join(result_links)
517
-
518
- results_list = gr.Markdown(list_result_files())
519
- download_results_btn = gr.Button("Download Results")
520
 
521
  # Handle file upload and storage
522
  def handle_upload(file):
@@ -540,8 +951,8 @@ def create_main_app():
540
  with open(task_filename, "w") as f:
541
  json.dump(task_data, f, indent=2)
542
 
543
- return f"Task '{task_data['task_name']}' uploaded successfully with {len(task_data['samples'])} samples. Please refresh the app and use the Demo tab to evaluate it.", list_task_files(), f"""
544
- <div style="padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
545
  <h3>Task uploaded successfully!</h3>
546
  <p>Task Name: {task_data['task_name']}</p>
547
  <p>Samples: {len(task_data['samples'])}</p>
@@ -555,7 +966,7 @@ def create_main_app():
555
  </div>
556
  """
557
  except Exception as e:
558
- return f"Error processing task file: {str(e)}", task_list.value, ""
559
 
560
  # Function to prepare results for download
561
  def prepare_results_for_download():
@@ -596,7 +1007,7 @@ def create_main_app():
596
  ## Manage Evaluation Results
597
 
598
  View, download, and analyze your evaluation results.
599
- """)
600
 
601
  # Function to load and display result stats
602
  def get_result_stats():
@@ -642,14 +1053,17 @@ def create_main_app():
642
 
643
  return "\n\n".join(stats)
644
 
645
- result_stats = gr.Markdown(get_result_stats())
646
- refresh_results_btn = gr.Button("Refresh Results")
 
647
 
648
  # Add download options
649
- with gr.Row():
650
- download_all_btn = gr.Button("Download All Results (ZIP)")
651
- result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download")
652
- download_selected_btn = gr.Button("Download Selected")
 
 
653
 
654
  # Function to prepare all results for download as ZIP
655
  def prepare_all_results():
 
78
  return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
79
 
80
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
81
+ # Header section with title and progress indicators
82
+ with gr.Row(equal_height=True):
83
+ with gr.Column(scale=3):
84
+ gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
85
+ with gr.Column(scale=1):
86
+ progress_text = gr.Textbox(
87
+ label="Progress",
88
+ value=f"Progress: 0/{len(samples)}",
89
+ interactive=False
90
+ )
91
 
92
+ # Instructions in a collapsible section
93
+ with gr.Accordion("📋 Task Instructions", open=False):
94
  gr.Markdown("""
95
  ## Task Instructions
96
 
 
106
  7. Your rankings are automatically saved when you submit or navigate
107
  """.format(instructions=task_data.get("instructions", "Rank documents by their relevance to the query.")))
108
 
109
+ # Hidden state variables
110
  current_sample_id = gr.State(value=samples[0]["id"])
111
  auto_save_enabled = gr.State(value=True)
112
 
113
+ # Status and control section
114
+ with gr.Row(equal_height=True):
115
+ with gr.Column(scale=3):
116
+ status_box = gr.Textbox(
117
+ label="Status",
118
+ value="Ready to start evaluation",
119
+ interactive=False
120
+ )
121
+ with gr.Column(scale=1):
122
+ auto_save_toggle = gr.Checkbox(
123
+ label="Auto-save when navigating",
124
+ value=True
125
+ )
126
 
127
+ # Main content area
128
  with gr.Group():
129
+ # Query section with clear visual distinction
130
+ with gr.Box():
131
+ gr.Markdown("## 📝 Query")
132
+ query_text = gr.Textbox(
133
+ value=samples[0]["query"],
134
+ label="",
135
+ interactive=False,
136
+ elem_classes=["query-text"]
137
+ )
138
+
139
+ # Quick ranking tools in a nicely formatted bar
140
+ with gr.Row(equal_height=True):
141
+ gr.Markdown("### 🔄 Quick Ranking Tools:", elem_classes=["tool-heading"])
142
+ sequential_btn = gr.Button("Rank in Order (1,2,3...)", elem_classes=["tool-button"])
143
+ reverse_btn = gr.Button("Reverse Order (n,n-1,...)", elem_classes=["tool-button"])
144
+ clear_btn = gr.Button("Clear All Rankings", elem_classes=["tool-button"])
145
 
146
+ # Documents section with improved layout
147
+ gr.Markdown("## 📄 Documents to Rank")
148
 
149
+ # Container for documents and rankings
150
  doc_containers = []
151
  ranking_inputs = []
152
  validation_indicators = []
153
 
154
+ # Create a better visual layout for documents
155
+ with gr.Box():
 
 
 
 
 
 
 
156
  for i, doc in enumerate(samples[0]["candidates"]):
157
+ row_class = "document-row-even" if i % 2 == 0 else "document-row-odd"
158
+ with gr.Row(equal_height=True, elem_classes=["document-row", row_class]):
159
+ with gr.Column(scale=1, min_width=50):
160
+ gr.HTML(f"<div class='doc-number'>{i+1}</div>")
161
+
162
+ with gr.Column(scale=6):
163
  doc_box = gr.Textbox(
164
  value=doc,
165
  label=f"Document {i+1}",
166
+ interactive=False,
167
+ elem_classes=["document-text"]
168
  )
169
  doc_containers.append(doc_box)
170
 
171
+ with gr.Column(scale=2):
172
+ # Dropdown for ranking
173
  rank_input = gr.Dropdown(
174
  choices=[str(j) for j in range(1, len(samples[0]["candidates"])+1)],
175
  label=f"Rank",
176
+ value="",
177
+ elem_classes=["rank-dropdown"]
178
  )
179
  ranking_inputs.append(rank_input)
180
 
181
+ with gr.Column(scale=2):
182
+ # Validation indicator
183
  validation = gr.HTML(value="")
184
  validation_indicators.append(validation)
185
 
186
+ # Navigation and submission controls
187
+ with gr.Row(equal_height=True):
188
  prev_btn = gr.Button("← Previous Query", size="sm")
189
  submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
190
  next_btn = gr.Button("Next Query →", size="sm")
191
 
192
+ # Save results button
193
  with gr.Row():
194
+ save_btn = gr.Button("💾 Save All Results", variant="secondary", size="sm")
195
  results_info = gr.HTML(value=f"<p>Results will be saved to <code>{task_data['task_name']}_human_results.json</code></p>")
196
 
197
+ # CSS for styling
198
+ gr.HTML("""
199
+ <style>
200
+ .query-text textarea {
201
+ font-size: 16px !important;
202
+ font-weight: bold !important;
203
+ background-color: #f8f9fa !important;
204
+ border-left: 4px solid #2c7be5 !important;
205
+ padding-left: 10px !important;
206
+ }
207
+
208
+ .document-row {
209
+ border-bottom: 1px solid #e0e0e0;
210
+ padding: 10px 0;
211
+ margin-bottom: 5px !important;
212
+ }
213
+
214
+ .document-text textarea {
215
+ font-size: 14px !important;
216
+ line-height: 1.5 !important;
217
+ }
218
+
219
+ .rank-dropdown select {
220
+ font-weight: bold !important;
221
+ text-align: center !important;
222
+ }
223
+
224
+ .tool-button button {
225
+ min-width: 120px !important;
226
+ }
227
+
228
+ .tool-heading {
229
+ padding-top: 8px !important;
230
+ }
231
+
232
+ .document-row-even {
233
+ background-color: #f8f9fa;
234
+ }
235
+
236
+ .document-row-odd {
237
+ background-color: #ffffff;
238
+ }
239
+
240
+ .document-row:hover {
241
+ background-color: #e9ecef;
242
+ }
243
+
244
+ .doc-number {
245
+ display: flex;
246
+ align-items: center;
247
+ justify-content: center;
248
+ width: 30px;
249
+ height: 30px;
250
+ border-radius: 50%;
251
+ background-color: #2c7be5;
252
+ color: white;
253
+ font-weight: bold;
254
+ margin: 0 auto;
255
+ }
256
+
257
+ .rank-dropdown select {
258
+ font-weight: bold !important;
259
+ font-size: 16px !important;
260
+ text-align: center !important;
261
+ padding: 8px !important;
262
+ border-radius: 5px !important;
263
+ border: 2px solid #2c7be5 !important;
264
+ }
265
+
266
+ .rank-dropdown select:focus {
267
+ border-color: #007bff !important;
268
+ box-shadow: 0 0 0 0.2rem rgba(0, 123, 255, 0.25) !important;
269
+ }
270
+ </style>
271
+ """)
272
+
273
  def validate_rankings(*rankings):
274
+ """Validate rankings and update indicators with visual cues."""
275
  results = []
276
  all_valid = True
277
  for rank in rankings:
278
  if rank is None or rank == "":
279
+ results.append('<span style="color: #dc3545; font-weight: bold;">⚠️ Missing</span>')
280
  all_valid = False
281
  else:
282
+ results.append('<span style="color: #28a745; font-weight: bold;">✓ Rank ' + str(rank) + '</span>')
283
 
284
  return results + [all_valid] # Return validation indicators and validity flag
285
 
 
397
 
398
  # Define a function that collects all ranking values and validates them
399
  def submit_rankings(*args):
400
+ """Submit rankings with improved validation and user feedback."""
401
  # Get the last argument (sample_id) and the rankings
402
  if len(args) < 1:
403
  return "Error: No arguments provided", progress_text.value
 
419
  if i < len(validation_indicators):
420
  validation_indicators[i].update(value=result)
421
 
422
+ # Check for duplicate rankings
423
+ if all_valid:
424
+ try:
425
+ processed_rankings = [int(r) for r in rankings]
426
+ if len(set(processed_rankings)) != len(processed_rankings):
427
+ dup_ranks = {}
428
+ for i, r in enumerate(processed_rankings):
429
+ if r in dup_ranks:
430
+ dup_ranks[r].append(i)
431
+ else:
432
+ dup_ranks[r] = [i]
433
+
434
+ # Highlight duplicates with error styling
435
+ for rank, indices in dup_ranks.items():
436
+ if len(indices) > 1:
437
+ for idx in indices:
438
+ if idx < len(validation_indicators):
439
+ validation_indicators[idx].update(
440
+ value=f'<span style="color: #dc3545; font-weight: bold;">⚠️ Duplicate rank {rank}</span>'
441
+ )
442
+
443
+ return "⚠️ Each document must have a unique rank. Please fix duplicate rankings.", progress_text.value
444
+ except:
445
+ pass
446
+
447
  # If not all valid, return error message
448
  if not all_valid:
449
  return "⚠️ Please assign a rank to all documents before submitting", progress_text.value
450
 
451
  # Save the validated rankings
452
  status, progress = save_ranking(rankings, sample_id)
453
+
454
+ # Provide clear success feedback
455
+ if "✅" in status:
456
+ for i in range(len(validation_indicators)):
457
+ validation_indicators[i].update(
458
+ value=f'<span style="color: #28a745; font-weight: bold;">✓ Saved</span>'
459
+ )
460
+
461
  return status, progress
462
 
463
+ # Update ranking input's event handling for immediate validation
464
+ def on_ranking_change(*rankings):
465
+ """Validate rankings whenever any ranking dropdown changes."""
466
+ validation_results = validate_rankings(*rankings)
467
+ return validation_results[:-1] # Return only the validation indicators
468
+
469
+ # Check for overlapping ranks and duplicate assignments
470
+ def check_for_duplicates(*rankings):
471
+ """Highlight duplicate rankings with visual feedback."""
472
+ clean_rankings = []
473
+ for r in rankings:
474
+ if r is not None and r != "":
475
+ clean_rankings.append(int(r))
476
+
477
+ if len(clean_rankings) != len(set(clean_rankings)):
478
+ used_ranks = {}
479
+ for i, r in enumerate(rankings):
480
+ if r is not None and r != "":
481
+ rank = int(r)
482
+ if rank in used_ranks:
483
+ used_ranks[rank].append(i)
484
+ else:
485
+ used_ranks[rank] = [i]
486
+
487
+ results = []
488
+ for i, r in enumerate(rankings):
489
+ if r is not None and r != "":
490
+ rank = int(r)
491
+ if len(used_ranks[rank]) > 1:
492
+ results.append(f'<span style="color: #dc3545; font-weight: bold;">⚠️ Duplicate rank {rank}</span>')
493
+ else:
494
+ results.append(f'<span style="color: #28a745; font-weight: bold;">✓ Rank {rank}</span>')
495
+ else:
496
+ results.append('<span style="color: #dc3545; font-weight: bold;">⚠️ Missing</span>')
497
+
498
+ return results
499
+
500
  # Wire up events (Gradio 3.x syntax)
501
  submit_btn.click(
502
  fn=submit_rankings,
 
573
  inputs=[auto_save_toggle],
574
  outputs=[auto_save_enabled]
575
  )
576
+
577
+ # Connect validation to ranking inputs for real-time feedback
578
+ for i, ranking in enumerate(ranking_inputs):
579
+ ranking.change(
580
+ fn=on_ranking_change,
581
+ inputs=ranking_inputs,
582
+ outputs=validation_indicators
583
+ )
584
+
585
+ # Add a real-time validation for the entire set to check for duplicates
586
+ def validate_all_inputs(*rankings):
587
+ """Check all inputs for duplicate ranks and provide feedback."""
588
+ validation_results = validate_rankings(*rankings)
589
+ all_valid = validation_results[-1]
590
+ validation_indicators_values = validation_results[:-1]
591
+
592
+ # Show clear button status based on validation
593
+ submit_status = "Ready to submit" if all_valid else "Please assign unique ranks to all documents"
594
+
595
+ return validation_indicators_values + [submit_status]
596
+
597
+ # Connect this validation to all ranking inputs
598
+ for ranking in ranking_inputs:
599
+ ranking.change(
600
+ fn=validate_all_inputs,
601
+ inputs=ranking_inputs,
602
+ outputs=validation_indicators + [status_box]
603
+ )
604
+
605
+ # Helper function for ranking - sort documents by rankings
606
+ def rank_by_relevance(*args):
607
+ """Sorts the documents by their current rankings for a clearer view."""
608
+ # Last argument is sample_id
609
+ sample_id = args[-1]
610
+ rankings = args[:-1]
611
+
612
+ # Check if we have valid rankings
613
+ valid_rankings = []
614
+ for i, r in enumerate(rankings):
615
+ if r is not None and r != "":
616
+ try:
617
+ valid_rankings.append((i, int(r)))
618
+ except:
619
+ pass
620
+
621
+ # If we don't have enough valid rankings, do nothing
622
+ if len(valid_rankings) < 2:
623
+ return [status_box.value]
624
+
625
+ # Sort by rank
626
+ valid_rankings.sort(key=lambda x: x[1])
627
+
628
+ # Generate message showing the ranking order
629
+ result = "<p><strong>Current ranking order:</strong></p><ol>"
630
+ for idx, _ in valid_rankings:
631
+ doc_text = doc_containers[idx].value
632
+ # Truncate if too long
633
+ if len(doc_text) > 100:
634
+ doc_text = doc_text[:97] + "..."
635
+ result += f"<li>Doc {idx+1}: {doc_text}</li>"
636
+ result += "</ol>"
637
+
638
+ return [result]
639
+
640
+ # Add a "Show Current Ranking" button
641
+ with gr.Row():
642
+ show_ranking_btn = gr.Button("👁️ Show Current Ranking Order", variant="secondary")
643
+ ranking_display = gr.HTML("")
644
+
645
+ # Connect the show ranking button
646
+ show_ranking_btn.click(
647
+ fn=rank_by_relevance,
648
+ inputs=ranking_inputs + [current_sample_id],
649
+ outputs=[ranking_display]
650
+ )
651
+
652
+ # Add a ranking preview section that shows documents in their ranked order
653
+ def generate_ranking_preview(*rankings):
654
+ """Creates a visual preview of current rankings."""
655
+ # Create list of (index, rank) pairs for valid rankings
656
+ ranked_docs = []
657
+ for i, rank in enumerate(rankings):
658
+ if rank and rank.strip():
659
+ try:
660
+ ranked_docs.append((i, int(rank)))
661
+ except:
662
+ continue
663
+
664
+ # Sort by rank
665
+ ranked_docs.sort(key=lambda x: x[1])
666
+
667
+ # Generate HTML for the preview
668
+ if not ranked_docs:
669
+ return "<p><i>No rankings assigned yet. Assign ranks to see a preview.</i></p>"
670
+
671
+ html = "<div class='ranking-preview'>"
672
+ html += "<h3>Current Ranking Preview</h3>"
673
+ html += "<ol class='ranked-docs'>"
674
+
675
+ for doc_idx, rank in ranked_docs:
676
+ if doc_idx < len(doc_containers):
677
+ doc_text = doc_containers[doc_idx].value
678
+ # Truncate if too long
679
+ if len(doc_text) > 100:
680
+ doc_text = doc_text[:97] + "..."
681
+
682
+ html += f"""
683
+ <li class='ranked-doc'>
684
+ <div class='rank-badge'>#{rank}</div>
685
+ <div class='doc-index'>Document {doc_idx+1}</div>
686
+ <div class='doc-content'>{doc_text}</div>
687
+ </li>
688
+ """
689
+
690
+ html += "</ol></div>"
691
+
692
+ # Add CSS for the preview
693
+ html += """
694
+ <style>
695
+ .ranking-preview {
696
+ margin-top: 20px;
697
+ padding: 10px;
698
+ border: 1px solid #e0e0e0;
699
+ border-radius: 5px;
700
+ background-color: #f8f9fa;
701
+ }
702
+
703
+ .ranked-docs {
704
+ list-style-type: none;
705
+ padding: 0;
706
+ }
707
+
708
+ .ranked-doc {
709
+ display: flex;
710
+ align-items: center;
711
+ padding: 10px;
712
+ margin-bottom: 5px;
713
+ border: 1px solid #ddd;
714
+ border-radius: 5px;
715
+ background-color: white;
716
+ }
717
+
718
+ .rank-badge {
719
+ display: flex;
720
+ align-items: center;
721
+ justify-content: center;
722
+ width: 40px;
723
+ height: 40px;
724
+ border-radius: 50%;
725
+ background-color: #2c7be5;
726
+ color: white;
727
+ font-weight: bold;
728
+ margin-right: 10px;
729
+ }
730
+
731
+ .doc-index {
732
+ font-weight: bold;
733
+ width: 120px;
734
+ }
735
+
736
+ .doc-content {
737
+ flex-grow: 1;
738
+ overflow: hidden;
739
+ text-overflow: ellipsis;
740
+ }
741
+ </style>
742
+ """
743
+
744
+ return html
745
+
746
+ # Add ranking preview
747
+ ranking_preview = gr.HTML("<p><i>No rankings assigned yet. Assign ranks to see a preview.</i></p>")
748
+
749
+ # Update the ranking preview whenever a ranking changes
750
+ for ranking in ranking_inputs:
751
+ ranking.change(
752
+ fn=generate_ranking_preview,
753
+ inputs=ranking_inputs,
754
+ outputs=[ranking_preview]
755
+ )
756
+
757
+ # Show preview section
758
+ with gr.Accordion("📊 Ranking Preview", open=True):
759
+ ranking_preview
760
 
761
  return demo
762
 
 
768
  task_container = gr.HTML()
769
  loaded_task_info = gr.JSON(label="Loaded Task Information", visible=False)
770
 
771
+ # CSS for consistent styling throughout the app
772
+ gr.HTML("""
773
+ <style>
774
+ /* Main App Styling */
775
+ .tab-content {
776
+ padding: 15px !important;
777
+ }
778
+
779
+ .btn-primary {
780
+ background-color: #2c7be5 !important;
781
+ }
782
+
783
+ .btn-secondary {
784
+ background-color: #6c757d !important;
785
+ }
786
+
787
+ /* Status messages */
788
+ .status-message {
789
+ font-weight: bold !important;
790
+ }
791
+
792
+ /* Box styling */
793
+ .content-box {
794
+ border: 1px solid #e0e0e0;
795
+ border-radius: 5px;
796
+ padding: 15px;
797
+ margin-bottom: 15px;
798
+ background-color: #f8f9fa;
799
+ }
800
+
801
+ /* Section headers */
802
+ .section-header {
803
+ border-bottom: 2px solid #2c7be5;
804
+ padding-bottom: 5px;
805
+ margin-bottom: 15px;
806
+ }
807
+ </style>
808
+ """)
809
+
810
  tabs = gr.Tabs()
811
 
812
  with tabs:
 
815
  ## MTEB Human Evaluation Interface
816
 
817
  This interface allows you to evaluate the relevance of documents for reranking tasks.
818
+ """, elem_classes=["section-header"])
819
 
820
  # Function to get the most recent task file
821
  def get_latest_task_file():
 
854
  # Load the task file
855
  task_file = get_latest_task_file()
856
 
857
+ with gr.Box(elem_classes=["content-box"]):
858
+ if task_file:
859
+ try:
860
+ with open(task_file, "r") as f:
861
+ task_data = json.load(f)
862
+
863
+ # Show which task is currently loaded
864
+ gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
865
+
866
+ # Display the interface
867
+ demo = create_reranking_interface(task_data)
868
+ task_container.update(value=f"<p>Task loaded: {task_file}</p>")
869
+ except Exception as e:
870
+ gr.Markdown(f"**Error loading task: {str(e)}**", elem_classes=["status-message"])
871
+ gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
872
+ else:
873
+ gr.Markdown("**No task file found**", elem_classes=["status-message"])
874
  gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
 
 
 
875
 
876
  with gr.TabItem("Upload & Evaluate"):
877
  gr.Markdown("""
878
  ## Upload Your Own Task File
879
 
880
  If you have a prepared task file, you can upload it here to create an evaluation interface.
881
+ """, elem_classes=["section-header"])
882
 
883
  with gr.Row():
884
  with gr.Column(scale=1):
885
+ with gr.Box(elem_classes=["content-box"]):
886
+ file_input = gr.File(label="Upload a task file (JSON)")
887
+ load_btn = gr.Button("Load Task", variant="primary")
888
+ message = gr.Textbox(label="Status", interactive=False, elem_classes=["status-message"])
889
 
890
  # Add task list for previously uploaded tasks
891
+ with gr.Box(elem_classes=["content-box"]):
892
+ gr.Markdown("### Previous Uploads", elem_classes=["section-header"])
893
+
894
+ # Function to list existing task files in the tasks directory
895
+ def list_task_files():
896
+ os.makedirs("uploaded_tasks", exist_ok=True)
897
+ tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
898
+ if not tasks:
899
+ return "No task files uploaded yet."
900
+ return "\n".join([f"- {t}" for t in tasks])
901
+
902
+ task_list = gr.Markdown(list_task_files())
903
+ refresh_btn = gr.Button("Refresh List")
904
 
905
  # Add results management section
906
+ with gr.Box(elem_classes=["content-box"]):
907
+ gr.Markdown("### Results Management", elem_classes=["section-header"])
 
 
 
 
 
908
 
909
+ # Function to list existing result files
910
+ def list_result_files():
911
+ results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
912
+ if not results:
913
+ return "No result files available yet."
914
+
915
+ result_links = []
916
+ for r in results:
917
+ # Calculate completion stats
918
+ try:
919
+ with open(r, "r") as f:
920
+ result_data = json.load(f)
921
+ annotation_count = len(result_data.get("annotations", []))
922
+ task_name = result_data.get("task_name", "Unknown")
923
+ result_links.append(f"- {r} ({annotation_count} annotations for {task_name})")
924
+ except:
925
+ result_links.append(f"- {r}")
926
+
927
+ return "\n".join(result_links)
928
 
929
+ results_list = gr.Markdown(list_result_files())
930
+ download_results_btn = gr.Button("Download Results")
 
 
931
 
932
  # Handle file upload and storage
933
  def handle_upload(file):
 
951
  with open(task_filename, "w") as f:
952
  json.dump(task_data, f, indent=2)
953
 
954
+ return f"Task '{task_data['task_name']}' uploaded successfully with {len(task_data['samples'])} samples. Please refresh the app and use the Demo tab to evaluate it.", list_task_files(), f"""
955
+ <div class="content-box">
956
  <h3>Task uploaded successfully!</h3>
957
  <p>Task Name: {task_data['task_name']}</p>
958
  <p>Samples: {len(task_data['samples'])}</p>
 
966
  </div>
967
  """
968
  except Exception as e:
969
+ return f"⚠️ Error processing task file: {str(e)}", task_list.value, ""
970
 
971
  # Function to prepare results for download
972
  def prepare_results_for_download():
 
1007
  ## Manage Evaluation Results
1008
 
1009
  View, download, and analyze your evaluation results.
1010
+ """, elem_classes=["section-header"])
1011
 
1012
  # Function to load and display result stats
1013
  def get_result_stats():
 
1053
 
1054
  return "\n\n".join(stats)
1055
 
1056
+ with gr.Box(elem_classes=["content-box"]):
1057
+ result_stats = gr.Markdown(get_result_stats())
1058
+ refresh_results_btn = gr.Button("Refresh Results", variant="secondary")
1059
 
1060
  # Add download options
1061
+ with gr.Box(elem_classes=["content-box"]):
1062
+ gr.Markdown("### Download Options", elem_classes=["section-header"])
1063
+ with gr.Row():
1064
+ download_all_btn = gr.Button("Download All Results (ZIP)", variant="primary")
1065
+ result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download")
1066
+ download_selected_btn = gr.Button("Download Selected", variant="secondary")
1067
 
1068
  # Function to prepare all results for download as ZIP
1069
  def prepare_all_results():