Spaces:

AdnanElAssadi
/

MTEB-Human-Eval-Demo

Sleeping

App Files Files Community

AdnanElAssadi commited on Apr 7

Commit

43b6806

verified ·

1 Parent(s): 87ef596

Update app.py

Browse files

Files changed (1) hide show

app.py +598 -511

app.py CHANGED Viewed

@@ -5,537 +5,585 @@ from pathlib import Path
 def create_reranking_interface(task_data):
     """Create a Gradio interface for reranking evaluation using drag and drop."""
-    samples = task_data["samples"]
-    results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
-    completed_samples = {s["id"]: False for s in samples}
-    # Define helper functions before UI elements are created
-    def generate_sortable_html(candidates, existing_ranks=None):
-        """Generate the HTML for the sortable list with up/down buttons."""
-        if existing_ranks and len(existing_ranks) == len(candidates):
-            order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
-        else:
-            order = list(range(len(candidates)))
-        html = '<div id="sortable-container" class="sortable-container">'
-        for rank_minus_1, idx in enumerate(order):
-            if idx < len(candidates):
-                doc = candidates[idx]
-                rank = rank_minus_1 + 1
-                import html as html_escaper
-                escaped_doc = html_escaper.escape(doc)
-                # Add navigation buttons (up/down arrows)
-                up_disabled = "disabled" if rank == 1 else ""
-                down_disabled = "disabled" if rank == len(candidates) else ""
-                html += f'''\
-                <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
-                    <div class="rank-controls">
-                        <button type="button" class="rank-btn up-btn" {up_disabled} onclick="window.moveItemUp({rank})">▲</button>
-                        <div class="rank-badge">{rank}</div>
-                        <button type="button" class="rank-btn down-btn" {down_disabled} onclick="window.moveItemDown({rank})">▼</button>
-                    </div>
-                    <div class="doc-content">{escaped_doc}</div>
-                </div>
-                '''
-        html += '</div>'
-        # Also return the computed order for proper initialization
-        return html, order
-    def save_ranking(order_json, sample_id):
-        """Save the current ranking to results."""
-        try:
-            if not order_json or order_json == "[]":
-                return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
-            order = json.loads(order_json)
-            sample = next((s for s in samples if s["id"] == sample_id), None)
-            if not sample:
-                return "⚠️ Sample not found.", progress_text.value
-            num_candidates = len(sample["candidates"])
-            if len(order) != num_candidates:
-                return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
-            rankings = [0] * num_candidates
             try:
-                for rank_minus_1, doc_idx in enumerate(order):
-                    if doc_idx < num_candidates:
-                        rankings[doc_idx] = rank_minus_1 + 1
-                    else:
-                        raise ValueError(f"Invalid document index {doc_idx} found in order.")
             except Exception as e:
-                return f"⚠️ Error processing ranking order: {str(e)}", progress_text.value
-            if sorted(rankings) != list(range(1, num_candidates + 1)):
-                return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
-            annotation = {"sample_id": sample_id, "rankings": rankings}
-            # Check if this sample was already annotated
-            existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
-            if existing_idx is not None:
-                results["annotations"][existing_idx] = annotation
-            else:
-                results["annotations"].append(annotation)
-            completed_samples[sample_id] = True
-            # Save results with timestamp and better error handling
-            try:
-                output_path = f"{task_data['task_name']}_human_results.json"
-                with open(output_path, "w") as f:
-                    json.dump(results, f, indent=2)
-                # Check if all samples are complete
-                all_completed = sum(completed_samples.values()) == len(samples)
-                completion_message = "🎉 All samples completed! You can save and submit your results." if all_completed else ""
-                return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed) {completion_message}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            except Exception as file_error:
-                print(f"Error saving file: {str(file_error)}")
-                # Still mark as completed in memory even if file save fails
-                return f"⚠️ Rankings recorded but file save failed: {str(file_error)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-        except json.JSONDecodeError:
-            return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
-        except Exception as e:
-            import traceback
-            print(traceback.format_exc())
-            return f"Error saving ranking: {str(e)}", progress_text.value
-    def load_sample(sample_id):
-        """Load a sample into the interface."""
-        try:
-            sample = next((s for s in samples if s["id"] == sample_id), None)
-            if not sample:
-                return gr.update(), gr.update(value="[]"), gr.update(), gr.update()
-            existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
-            # Get both the HTML and the initial order
-            new_html, initial_order = generate_sortable_html(sample["candidates"], existing_ranking)
-            # Convert initial order to JSON string for state
-            initial_order_json = json.dumps(initial_order)
-            status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
-            progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            return sample["query"], new_html, initial_order_json, progress, status
-        except Exception as e:
-            import traceback
-            print(traceback.format_exc())
-            return gr.update(), gr.update(), "[]", gr.update(), f"Error loading sample: {str(e)}"
-    def next_sample_id(current_id):
-        current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
-        if current_idx == -1:
-            return current_id
-        next_idx = min(current_idx + 1, len(samples) - 1)
-        return samples[next_idx]["id"]
-    def prev_sample_id(current_id):
-        current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
-        if current_idx == -1:
-            return current_id
-        prev_idx = max(current_idx - 1, 0)
-        return samples[prev_idx]["id"]
-    def save_results():
-        output_path = f"{task_data['task_name']}_human_results.json"
-        try:
-            # Create backup with timestamp
-            from datetime import datetime
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            backup_path = f"{task_data['task_name']}_results_{timestamp}.json"
-            # First create a backup
-            with open(backup_path, "w") as f:
-                json.dump(results, f, indent=2)
-            # Then save to the main file
-            with open(output_path, "w") as f:
-                json.dump(results, f, indent=2)
-            return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)\nBackup created at {backup_path}"
-        except Exception as e:
-            return f"⚠️ Error saving results file: {str(e)}"
-    with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
-        with gr.Accordion("Instructions", open=True):
-            gr.Markdown("""
-            ## Task Instructions
-            {instructions}
-            ### How to use this interface:
-            1. Read the query at the top
-            2. Drag and drop documents to reorder them based on relevance
-            3. Top document = Rank 1, Second = Rank 2, etc.
-            4. Click "Submit Rankings" when you're done with the current query
-            5. Use "Previous" and "Next" to navigate between queries
-            6. Click "Save All Results" periodically to ensure your work is saved
-            """.format(instructions=task_data["instructions"]))
-        current_sample_id = gr.State(value=samples[0]["id"])
-        with gr.Row():
-            progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
-            status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
-        with gr.Group():
-            gr.Markdown("## Query:")
-            query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
-            gr.Markdown("## Documents to Rank (Drag to Reorder):")
-            sortable_list = gr.HTML(generate_sortable_html(samples[0]["candidates"], []), elem_id="sortable-list-container")
-            order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
-            with gr.Row():
-                prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
-                submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
-                next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
-            save_btn = gr.Button("💾 Save All Results", variant="secondary")
-        js_code = """
-        <script src="https://cdn.jsdelivr.net/npm/[email protected]/Sortable.min.js"></script>
-        <script>
-        // Make the functions globally available
-        window.moveItemUp = function(currentRank) {
-            console.log('Moving item up:', currentRank);
-            if (currentRank <= 1) return; // Already at the top
-            const container = document.getElementById('sortable-container');
-            if (!container) {
-                console.error('Container not found');
-                return;
-            }
-            const items = Array.from(container.querySelectorAll('.sortable-item'));
-            console.log('Found items:', items.length);
-            // Find the items to swap by their data-rank attribute
-            const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
-            const aboveItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank - 1);
-            if (!currentItem || !aboveItem) {
-                console.error('Items not found:', currentItem, aboveItem);
-                return;
-            }
-            console.log('Swapping items:', currentItem, aboveItem);
-            // Swap the items in the DOM
-            if (aboveItem.previousElementSibling) {
-                container.insertBefore(currentItem, aboveItem);
-            } else {
-                container.insertBefore(currentItem, container.firstChild);
-            }
-            // Update ranks
-            window.updateRanksAfterMove();
-        };
-        window.moveItemDown = function(currentRank) {
-            console.log('Moving item down:', currentRank);
-            const container = document.getElementById('sortable-container');
-            if (!container) {
-                console.error('Container not found');
-                return;
-            }
-            const items = Array.from(container.querySelectorAll('.sortable-item'));
-            console.log('Found items:', items.length);
-            if (currentRank >= items.length) return; // Already at the bottom
-            // Find the items to swap by their data-rank attribute
-            const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
-            const belowItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank + 1);
-            if (!currentItem || !belowItem) {
-                console.error('Items not found for moving down');
-                return;
-            }
-            console.log('Swapping items down:', currentItem, belowItem);
-            // Swap the items in the DOM - insert the current item after the below item
-            container.insertBefore(currentItem, belowItem.nextElementSibling);
-            // Update ranks
-            window.updateRanksAfterMove();
-        };
-        window.updateRanksAfterMove = function() {
-            console.log('Updating ranks');
-            const container = document.getElementById('sortable-container');
-            if (!container) {
-                console.error('Container not found for rank update');
-                return;
-            }
-            const items = Array.from(container.querySelectorAll('.sortable-item'));
-            const orderInput = document.querySelector('#current-order textarea');
-            if (!orderInput) {
-                console.error('Order input not found');
-                return;
-            }
-            const order = [];
-            items.forEach((item, index) => {
-                const rank = index + 1;
-                const docId = parseInt(item.getAttribute('data-doc-id'));
-                // Update rank display
-                const rankBadge = item.querySelector('.rank-badge');
-                if (rankBadge) rankBadge.textContent = rank;
-                // Update item classes
-                item.className = item.className.replace(/rank-bg-\\d+/g, '').trim();
-                item.classList.add(`rank-bg-${rank}`);
-                // Update data attribute
-                item.setAttribute('data-rank', rank);
-                // Update button states
-                const upBtn = item.querySelector('.up-btn');
-                const downBtn = item.querySelector('.down-btn');
-                if (upBtn) {
-                    if (rank == 1) {
-                        upBtn.setAttribute('disabled', 'disabled');
-                    } else {
-                        upBtn.removeAttribute('disabled');
-                    }
                 }
-                if (downBtn) {
-                    if (rank == items.length) {
-                        downBtn.setAttribute('disabled', 'disabled');
-                    } else {
-                        downBtn.removeAttribute('disabled');
-                    }
                 }
-                order.push(docId);
-            });
-            // Update hidden input with JSON
-            console.log('New order:', order);
-            const newOrderValue = JSON.stringify(order);
-            orderInput.value = newOrderValue;
-            // Trigger input event
-            const event = new Event('input', { bubbles: true });
-            orderInput.dispatchEvent(event);
-        };
-        document.addEventListener('DOMContentLoaded', function() {
-            console.log('DOM loaded, initializing ranking interface');
-            // Function to initialize the interface
-            function initializeRankingInterface() {
                 const container = document.getElementById('sortable-container');
                 if (!container) {
-                    console.log('Container not found, retrying in 200ms');
-                    setTimeout(initializeRankingInterface, 200);
                     return;
                 }
-                console.log('Sortable container found, setting up');
-                // Add click events directly to buttons as a backup
-                const upButtons = container.querySelectorAll('.up-btn');
-                const downButtons = container.querySelectorAll('.down-btn');
-                upButtons.forEach(btn => {
-                    btn.addEventListener('click', function() {
-                        const item = this.closest('.sortable-item');
-                        const rank = parseInt(item.getAttribute('data-rank'));
-                        window.moveItemUp(rank);
-                    });
-                });
-                downButtons.forEach(btn => {
-                    btn.addEventListener('click', function() {
-                        const item = this.closest('.sortable-item');
-                        const rank = parseInt(item.getAttribute('data-rank'));
-                        window.moveItemDown(rank);
-                    });
                 });
-                // Initialize drag-and-drop as fallback
-                if (typeof Sortable !== 'undefined') {
-                    if (!container.sortableInstance) {
-                        container.sortableInstance = new Sortable(container, {
-                            animation: 150,
-                            ghostClass: "sortable-ghost",
-                            onEnd: function() {
-                                window.updateRanksAfterMove();
-                            }
                         });
                     }
-                } else {
-                    console.log('Sortable library not available');
                 }
-                // Initialize the ranking
-                window.updateRanksAfterMove();
-            }
-            // Initialize immediately
-            initializeRankingInterface();
-            // Also observe DOM changes to reinitialize when needed
-            const targetNode = document.getElementById('sortable-list-container');
-            if (targetNode) {
-                const config = { childList: true, subtree: true };
-                const observer = new MutationObserver(function(mutationsList) {
-                    for(const mutation of mutationsList) {
-                        if (mutation.type === 'childList') {
-                            if (document.getElementById('sortable-container')) {
-                                console.log('DOM changed, reinitializing');
-                                initializeRankingInterface();
                             }
                         }
-                    }
-                });
-                observer.observe(targetNode, config);
             }
-        });
-        </script>
-        <style>
-        .sortable-container {
-            display: flex;
-            flex-direction: column;
-            gap: 12px;
-            min-height: 200px;
-            padding: 16px;
-            background-color: #f8f9fa;
-            border-radius: 8px;
-        }
-        .sortable-item {
-            padding: 14px;
-            background-color: #fff;
-            border: 1px solid #e0e0e0;
-            border-radius: 6px;
-            display: flex;
-            align-items: center;
-            transition: all 0.2s ease;
-        }
-        .sortable-item:hover {
-            background-color: #f8f9fa;
-            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-        }
-        .rank-controls {
-            display: flex;
-            flex-direction: column;
-            align-items: center;
-            margin-right: 16px;
-        }
-        .rank-badge {
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            width: 28px;
-            height: 28px;
-            border-radius: 50%;
-            background-color: #6c757d;
-            color: white;
-            font-weight: bold;
-            margin: 6px 0;
-            flex-shrink: 0;
-        }
-        .rank-btn {
-            width: 28px;
-            height: 28px;
-            border: none;
-            background-color: #f0f0f0;
-            border-radius: 4px;
-            margin: 2px 0;
-            cursor: pointer;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            font-size: 14px;
-        }
-        .rank-btn:hover:not([disabled]) {
-            background-color: #e0e0e0;
-        }
-        .rank-btn:active:not([disabled]) {
-            background-color: #d0d0d0;
-        }
-        .rank-btn:disabled {
-            opacity: 0.5;
-            cursor: not-allowed;
-        }
-        .doc-content {
-            flex: 1;
-            line-height: 1.5;
-            word-break: break-word;
-        }
-        /* More professional color scheme for rank badges */
-        .rank-bg-1 .rank-badge { background-color: #1e40af; } /* Deep blue for top rank */
-        .rank-bg-2 .rank-badge { background-color: #3b82f6; } /* Medium blue */
-        .rank-bg-3 .rank-badge { background-color: #60a5fa; } /* Light blue */
-        .rank-bg-4 .rank-badge { background-color: #93c5fd; color: #1e3a8a; } /* Very light blue with dark text */
-        .rank-bg-5 .rank-badge { background-color: #bfdbfe; color: #1e3a8a; } /* Lightest blue with dark text */
-        /* Lower ranks get progressively more gray */
-        .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge {
-            background-color: #64748b;
-        }
-        .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge, .rank-bg-10 .rank-badge {
-            background-color: #94a3b8;
-            color: #0f172a;
-        }
-        .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge, .rank-bg-13 .rank-badge,
-        .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge, .rank-bg-16 .rank-badge,
-        .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge, .rank-bg-19 .rank-badge,
-        .rank-bg-20 .rank-badge {
-            background-color: #cbd5e1;
-            color: #0f172a;
-        }
-        </style>
-        """
-        gr.HTML(js_code)
-        submit_btn.click(
-            save_ranking,
-            inputs=[order_state, current_sample_id],
-            outputs=[status_box, progress_text]
-        )
-        next_btn.click(
-            next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
-        ).then(
-            load_sample,
-            inputs=[current_sample_id],
-            outputs=[query_text, sortable_list, order_state, progress_text, status_box]
-        )
-        prev_btn.click(
-            prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
-        ).then(
-            load_sample,
-            inputs=[current_sample_id],
-            outputs=[query_text, sortable_list, order_state, progress_text, status_box]
-        )
-        save_btn.click(save_results, outputs=[status_box])
-        demo.load(lambda: load_sample(samples[0]['id']),
-                  outputs=[query_text, sortable_list, order_state, progress_text, status_box])
-    return demo
-# Main app with file upload capability
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# MTEB Human Evaluation Demo")
@@ -547,35 +595,63 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             This interface allows you to evaluate the relevance of documents for reranking tasks.
             """)
-            # Function to get the most recent task file
             def get_latest_task_file():
-                # Check first in uploaded_tasks directory
-                os.makedirs("uploaded_tasks", exist_ok=True)
-                uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
-                if uploaded_tasks:
-                    # Sort by modification time, newest first
-                    uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
-                    return os.path.join("uploaded_tasks", uploaded_tasks[0])
-                # Fall back to default example
-                return "AskUbuntuDupQuestions_human_eval.json"
-            # Load the task file
             task_file = get_latest_task_file()
             try:
-                with open(task_file, "r") as f:
-                    task_data = json.load(f)
-                # Show which task is currently loaded
-                gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
-                # Display the interface
-                reranking_demo = create_reranking_interface(task_data)
             except Exception as e:
                 gr.Markdown(f"**Error loading task: {str(e)}**")
                 gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
         with gr.TabItem("Upload & Evaluate"):
             gr.Markdown("""
@@ -781,4 +857,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
 if __name__ == "__main__":
-    demo.launch()

 def create_reranking_interface(task_data):
     """Create a Gradio interface for reranking evaluation using drag and drop."""
+    try:
+        samples = task_data["samples"]
+        results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
+        completed_samples = {s["id"]: False for s in samples}
+        # Define helper functions before UI elements are created
+        def generate_sortable_html(candidates, existing_ranks=None):
+            """Generate the HTML for the sortable list with up/down buttons."""
             try:
+                if existing_ranks and len(existing_ranks) == len(candidates):
+                    order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
+                else:
+                    order = list(range(len(candidates)))
+                html = '<div id="sortable-container" class="sortable-container">'
+                for rank_minus_1, idx in enumerate(order):
+                    if idx < len(candidates):
+                        doc = candidates[idx]
+                        rank = rank_minus_1 + 1
+                        import html as html_escaper
+                        escaped_doc = html_escaper.escape(doc)
+                        # Add navigation buttons (up/down arrows)
+                        up_disabled = "disabled" if rank == 1 else ""
+                        down_disabled = "disabled" if rank == len(candidates) else ""
+                        html += f'''\
+                        <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
+                            <div class="rank-controls">
+                                <button type="button" class="rank-btn up-btn" {up_disabled} onclick="window.moveItemUp({rank})">▲</button>
+                                <div class="rank-badge">{rank}</div>
+                                <button type="button" class="rank-btn down-btn" {down_disabled} onclick="window.moveItemDown({rank})">▼</button>
+                            </div>
+                            <div class="doc-content">{escaped_doc}</div>
+                        </div>
+                        '''
+                html += '</div>'
+                # Also return the computed order for proper initialization
+                return html, order
             except Exception as e:
+                print(f"Error in generate_sortable_html: {str(e)}")
+                return f'<div class="error">Error generating ranking interface: {str(e)}</div>', []
+        def save_ranking(order_json, sample_id):
+            """Save the current ranking to results."""
+            try:
+                if not order_json or order_json == "[]":
+                    return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
+                order = json.loads(order_json)
+                sample = next((s for s in samples if s["id"] == sample_id), None)
+                if not sample:
+                    return "⚠️ Sample not found.", progress_text.value
+                num_candidates = len(sample["candidates"])
+                if len(order) != num_candidates:
+                    return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
+                rankings = [0] * num_candidates
+                try:
+                    for rank_minus_1, doc_idx in enumerate(order):
+                        if doc_idx < num_candidates:
+                            rankings[doc_idx] = rank_minus_1 + 1
+                        else:
+                            raise ValueError(f"Invalid document index {doc_idx} found in order.")
+                except Exception as e:
+                    return f"⚠️ Error processing ranking order: {str(e)}", progress_text.value
+                if sorted(rankings) != list(range(1, num_candidates + 1)):
+                    return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
+                annotation = {"sample_id": sample_id, "rankings": rankings}
+                # Check if this sample was already annotated
+                existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
+                if existing_idx is not None:
+                    results["annotations"][existing_idx] = annotation
+                else:
+                    results["annotations"].append(annotation)
+                completed_samples[sample_id] = True
+                # Save results with timestamp and better error handling
+                try:
+                    output_path = f"{task_data['task_name']}_human_results.json"
+                    with open(output_path, "w") as f:
+                        json.dump(results, f, indent=2)
+                    # Check if all samples are complete
+                    all_completed = sum(completed_samples.values()) == len(samples)
+                    completion_message = "🎉 All samples completed! You can save and submit your results." if all_completed else ""
+                    return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed) {completion_message}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+                except Exception as file_error:
+                    print(f"Error saving file: {str(file_error)}")
+                    # Still mark as completed in memory even if file save fails
+                    return f"⚠️ Rankings recorded but file save failed: {str(file_error)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+            except json.JSONDecodeError:
+                return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
+            except Exception as e:
+                import traceback
+                print(traceback.format_exc())
+                return f"Error saving ranking: {str(e)}", progress_text.value
+        def load_sample(sample_id):
+            """Load a sample into the interface."""
+            try:
+                sample = next((s for s in samples if s["id"] == sample_id), None)
+                if not sample:
+                    return gr.update(), gr.update(), "[]", gr.update(), "Sample not found"
+                existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
+                # Get both the HTML and the initial order
+                new_html, initial_order = generate_sortable_html(sample["candidates"], existing_ranking)
+                # Convert initial order to JSON string for state
+                initial_order_json = json.dumps(initial_order)
+                status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
+                progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+                return sample["query"], new_html, initial_order_json, progress, status
+            except Exception as e:
+                import traceback
+                print(traceback.format_exc())
+                return "Error loading sample", "<div>Error loading sample content</div>", "[]", "Error", f"Error: {str(e)}"
+        def next_sample_id(current_id):
+            try:
+                current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
+                if current_idx == -1:
+                    return samples[0]["id"] if samples else current_id
+                next_idx = min(current_idx + 1, len(samples) - 1)
+                return samples[next_idx]["id"]
+            except Exception as e:
+                print(f"Error in next_sample_id: {str(e)}")
+                return current_id
+        def prev_sample_id(current_id):
+            try:
+                current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
+                if current_idx == -1:
+                    return samples[0]["id"] if samples else current_id
+                prev_idx = max(current_idx - 1, 0)
+                return samples[prev_idx]["id"]
+            except Exception as e:
+                print(f"Error in prev_sample_id: {str(e)}")
+                return current_id
+        def save_results():
+            output_path = f"{task_data['task_name']}_human_results.json"
+            try:
+                # Create backup with timestamp
+                from datetime import datetime
+                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                backup_path = f"{task_data['task_name']}_results_{timestamp}.json"
+                # First create a backup
+                with open(backup_path, "w") as f:
+                    json.dump(results, f, indent=2)
+                # Then save to the main file
+                with open(output_path, "w") as f:
+                    json.dump(results, f, indent=2)
+                return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)\nBackup created at {backup_path}"
+            except Exception as e:
+                return f"⚠️ Error saving results file: {str(e)}"
+        # Create an empty initial sample ID with proper error handling
+        initial_sample_id = samples[0]["id"] if samples else None
+        if not initial_sample_id:
+            print("WARNING: No samples found in task data")
+            return gr.HTML("No samples found in the task data. Please check your task file and try again.")
+        with gr.Blocks(theme=gr.themes.Soft()) as demo:
+            gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
+            with gr.Accordion("Instructions", open=True):
+                gr.Markdown("""
+                ## Task Instructions
+                {instructions}
+                ### How to use this interface:
+                1. Read the query at the top
+                2. Drag and drop documents to reorder them based on relevance
+                3. Top document = Rank 1, Second = Rank 2, etc.
+                4. Click "Submit Rankings" when you're done with the current query
+                5. Use "Previous" and "Next" to navigate between queries
+                6. Click "Save All Results" periodically to ensure your work is saved
+                """.format(instructions=task_data.get("instructions", "Rank the documents based on their relevance to the query.")))
+            current_sample_id = gr.State(value=initial_sample_id)
+            with gr.Row():
+                progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
+                status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
+            with gr.Group():
+                gr.Markdown("## Query:")
+                query_text = gr.Textbox(value="Loading query...", label="", interactive=False)
+                gr.Markdown("## Documents to Rank (Drag to Reorder):")
+                sortable_list = gr.HTML("Loading documents...", elem_id="sortable-list-container")
+                order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
+                with gr.Row():
+                    prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
+                    submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
+                    next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
+                save_btn = gr.Button("💾 Save All Results", variant="secondary")
+            js_code = """
+            <script src="https://cdn.jsdelivr.net/npm/[email protected]/Sortable.min.js"></script>
+            <script>
+            // Make the functions globally available
+            window.moveItemUp = function(currentRank) {
+                console.log('Moving item up:', currentRank);
+                if (currentRank <= 1) return; // Already at the top
+                const container = document.getElementById('sortable-container');
+                if (!container) {
+                    console.error('Container not found');
+                    return;
+                }
+                const items = Array.from(container.querySelectorAll('.sortable-item'));
+                console.log('Found items:', items.length);
+                // Find the items to swap by their data-rank attribute
+                const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
+                const aboveItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank - 1);
+                if (!currentItem || !aboveItem) {
+                    console.error('Items not found:', currentItem, aboveItem);
+                    return;
                 }
+                console.log('Swapping items:', currentItem, aboveItem);
+                // Swap the items in the DOM
+                if (aboveItem.previousElementSibling) {
+                    container.insertBefore(currentItem, aboveItem);
+                } else {
+                    container.insertBefore(currentItem, container.firstChild);
                 }
+                // Update ranks
+                window.updateRanksAfterMove();
+            };
+            window.moveItemDown = function(currentRank) {
+                console.log('Moving item down:', currentRank);
                 const container = document.getElementById('sortable-container');
                 if (!container) {
+                    console.error('Container not found');
                     return;
                 }
+                const items = Array.from(container.querySelectorAll('.sortable-item'));
+                console.log('Found items:', items.length);
+                if (currentRank >= items.length) return; // Already at the bottom
+                // Find the items to swap by their data-rank attribute
+                const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
+                const belowItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank + 1);
+                if (!currentItem || !belowItem) {
+                    console.error('Items not found for moving down');
+                    return;
+                }
+                console.log('Swapping items down:', currentItem, belowItem);
+                // Swap the items in the DOM - insert the current item after the below item
+                container.insertBefore(currentItem, belowItem.nextElementSibling);
+                // Update ranks
+                window.updateRanksAfterMove();
+            };
+            window.updateRanksAfterMove = function() {
+                console.log('Updating ranks');
+                const container = document.getElementById('sortable-container');
+                if (!container) {
+                    console.error('Container not found for rank update');
+                    return;
+                }
+                const items = Array.from(container.querySelectorAll('.sortable-item'));
+                const orderInput = document.querySelector('#current-order textarea');
+                if (!orderInput) {
+                    console.error('Order input not found');
+                    return;
+                }
+                const order = [];
+                items.forEach((item, index) => {
+                    const rank = index + 1;
+                    const docId = parseInt(item.getAttribute('data-doc-id'));
+                    // Update rank display
+                    const rankBadge = item.querySelector('.rank-badge');
+                    if (rankBadge) rankBadge.textContent = rank;
+                    // Update item classes
+                    item.className = item.className.replace(/rank-bg-\\d+/g, '').trim();
+                    item.classList.add(`rank-bg-${rank}`);
+                    // Update data attribute
+                    item.setAttribute('data-rank', rank);
+                    // Update button states
+                    const upBtn = item.querySelector('.up-btn');
+                    const downBtn = item.querySelector('.down-btn');
+                    if (upBtn) {
+                        if (rank == 1) {
+                            upBtn.setAttribute('disabled', 'disabled');
+                        } else {
+                            upBtn.removeAttribute('disabled');
+                        }
+                    }
+                    if (downBtn) {
+                        if (rank == items.length) {
+                            downBtn.setAttribute('disabled', 'disabled');
+                        } else {
+                            downBtn.removeAttribute('disabled');
+                        }
+                    }
+                    order.push(docId);
                 });
+                // Update hidden input with JSON
+                console.log('New order:', order);
+                const newOrderValue = JSON.stringify(order);
+                orderInput.value = newOrderValue;
+                // Trigger input event
+                const event = new Event('input', { bubbles: true });
+                orderInput.dispatchEvent(event);
+            };
+            document.addEventListener('DOMContentLoaded', function() {
+                console.log('DOM loaded, initializing ranking interface');
+                // Function to initialize the interface
+                function initializeRankingInterface() {
+                    const container = document.getElementById('sortable-container');
+                    if (!container) {
+                        console.log('Container not found, retrying in 200ms');
+                        setTimeout(initializeRankingInterface, 200);
+                        return;
+                    }
+                    console.log('Sortable container found, setting up');
+                    // Add click events directly to buttons as a backup
+                    const upButtons = container.querySelectorAll('.up-btn');
+                    const downButtons = container.querySelectorAll('.down-btn');
+                    upButtons.forEach(btn => {
+                        btn.addEventListener('click', function() {
+                            const item = this.closest('.sortable-item');
+                            const rank = parseInt(item.getAttribute('data-rank'));
+                            window.moveItemUp(rank);
+                        });
+                    });
+                    downButtons.forEach(btn => {
+                        btn.addEventListener('click', function() {
+                            const item = this.closest('.sortable-item');
+                            const rank = parseInt(item.getAttribute('data-rank'));
+                            window.moveItemDown(rank);
                         });
+                    });
+                    // Initialize drag-and-drop as fallback
+                    if (typeof Sortable !== 'undefined') {
+                        if (!container.sortableInstance) {
+                            container.sortableInstance = new Sortable(container, {
+                                animation: 150,
+                                ghostClass: "sortable-ghost",
+                                onEnd: function() {
+                                    window.updateRanksAfterMove();
+                                }
+                            });
+                        }
+                    } else {
+                        console.log('Sortable library not available');
                     }
+                    // Initialize the ranking
+                    window.updateRanksAfterMove();
                 }
+                // Initialize immediately
+                initializeRankingInterface();
+                // Also observe DOM changes to reinitialize when needed
+                const targetNode = document.getElementById('sortable-list-container');
+                if (targetNode) {
+                    const config = { childList: true, subtree: true };
+                    const observer = new MutationObserver(function(mutationsList) {
+                        for(const mutation of mutationsList) {
+                            if (mutation.type === 'childList') {
+                                if (document.getElementById('sortable-container')) {
+                                    console.log('DOM changed, reinitializing');
+                                    initializeRankingInterface();
+                                }
                             }
                         }
+                    });
+                    observer.observe(targetNode, config);
+                }
+            });
+            </script>
+            <style>
+            .sortable-container {
+                display: flex;
+                flex-direction: column;
+                gap: 12px;
+                min-height: 200px;
+                padding: 16px;
+                background-color: #f8f9fa;
+                border-radius: 8px;
             }
+            .sortable-item {
+                padding: 14px;
+                background-color: #fff;
+                border: 1px solid #e0e0e0;
+                border-radius: 6px;
+                display: flex;
+                align-items: center;
+                transition: all 0.2s ease;
+            }
+            .sortable-item:hover {
+                background-color: #f8f9fa;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            }
+            .rank-controls {
+                display: flex;
+                flex-direction: column;
+                align-items: center;
+                margin-right: 16px;
+            }
+            .rank-badge {
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                width: 28px;
+                height: 28px;
+                border-radius: 50%;
+                background-color: #6c757d;
+                color: white;
+                font-weight: bold;
+                margin: 6px 0;
+                flex-shrink: 0;
+            }
+            .rank-btn {
+                width: 28px;
+                height: 28px;
+                border: none;
+                background-color: #f0f0f0;
+                border-radius: 4px;
+                margin: 2px 0;
+                cursor: pointer;
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                font-size: 14px;
+            }
+            .rank-btn:hover:not([disabled]) {
+                background-color: #e0e0e0;
+            }
+            .rank-btn:active:not([disabled]) {
+                background-color: #d0d0d0;
+            }
+            .rank-btn:disabled {
+                opacity: 0.5;
+                cursor: not-allowed;
+            }
+            .doc-content {
+                flex: 1;
+                line-height: 1.5;
+                word-break: break-word;
+            }
+            /* More professional color scheme for rank badges */
+            .rank-bg-1 .rank-badge { background-color: #1e40af; } /* Deep blue for top rank */
+            .rank-bg-2 .rank-badge { background-color: #3b82f6; } /* Medium blue */
+            .rank-bg-3 .rank-badge { background-color: #60a5fa; } /* Light blue */
+            .rank-bg-4 .rank-badge { background-color: #93c5fd; color: #1e3a8a; } /* Very light blue with dark text */
+            .rank-bg-5 .rank-badge { background-color: #bfdbfe; color: #1e3a8a; } /* Lightest blue with dark text */
+            /* Lower ranks get progressively more gray */
+            .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge {
+                background-color: #64748b;
+            }
+            .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge, .rank-bg-10 .rank-badge {
+                background-color: #94a3b8;
+                color: #0f172a;
+            }
+            .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge, .rank-bg-13 .rank-badge,
+            .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge, .rank-bg-16 .rank-badge,
+            .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge, .rank-bg-19 .rank-badge,
+            .rank-bg-20 .rank-badge {
+                background-color: #cbd5e1;
+                color: #0f172a;
+            }
+            .error {
+                padding: 16px;
+                background-color: #fee2e2;
+                border: 1px solid #f87171;
+                color: #b91c1c;
+                border-radius: 6px;
+                margin: 16px 0;
+            }
+            </style>
+            """
+            gr.HTML(js_code)
+            submit_btn.click(
+                save_ranking,
+                inputs=[order_state, current_sample_id],
+                outputs=[status_box, progress_text]
+            )
+            next_btn.click(
+                next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
+            ).then(
+                load_sample,
+                inputs=[current_sample_id],
+                outputs=[query_text, sortable_list, order_state, progress_text, status_box]
+            )
+            prev_btn.click(
+                prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
+            ).then(
+                load_sample,
+                inputs=[current_sample_id],
+                outputs=[query_text, sortable_list, order_state, progress_text, status_box]
+            )
+            save_btn.click(save_results, outputs=[status_box])
+            # Use a custom loading function with proper error handling
+            def safe_load_initial():
+                try:
+                    if initial_sample_id and samples:
+                        return load_sample(initial_sample_id)
+                    else:
+                        return "No query available", "<div>No documents available</div>", "[]", "No progress data", "Error: No samples found"
+                except Exception as e:
+                    print(f"Error in initial load: {str(e)}")
+                    return "Error loading query", "<div>Error loading documents</div>", "[]", "Error", f"Error: {str(e)}"
+            # Use the safe loading function to prevent scheduling failures
+            demo.load(safe_load_initial,
+                     outputs=[query_text, sortable_list, order_state, progress_text, status_box])
+        return demo
+    except Exception as e:
+        import traceback
+        print(f"Error creating reranking interface: {traceback.format_exc()}")
+        # Return a simple error interface instead of failing completely
+        with gr.Blocks() as error_demo:
+            gr.Markdown("# Error Creating Reranking Interface")
+            gr.Markdown(f"An error occurred while creating the interface: **{str(e)}**")
+            gr.Markdown("Please check your task data and try again.")
+        return error_demo
+# Main app with file upload capability and better error handling
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# MTEB Human Evaluation Demo")
             This interface allows you to evaluate the relevance of documents for reranking tasks.
             """)
+            # Function to get the most recent task file with error handling
             def get_latest_task_file():
+                try:
+                    # Check first in uploaded_tasks directory
+                    os.makedirs("uploaded_tasks", exist_ok=True)
+                    uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
+                    if uploaded_tasks:
+                        # Sort by modification time, newest first
+                        uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
+                        return os.path.join("uploaded_tasks", uploaded_tasks[0])
+                    # Fall back to default example
+                    if os.path.exists("AskUbuntuDupQuestions_human_eval.json"):
+                        return "AskUbuntuDupQuestions_human_eval.json"
+                    # If no files found
+                    return None
+                except Exception as e:
+                    print(f"Error getting latest task file: {str(e)}")
+                    return None
+            # Load the task file with proper error handling
             task_file = get_latest_task_file()
+            task_data = None
             try:
+                if task_file and os.path.exists(task_file):
+                    with open(task_file, "r") as f:
+                        task_data = json.load(f)
+                    # Show which task is currently loaded
+                    gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
+                    # Display the interface
+                    reranking_demo = create_reranking_interface(task_data)
+                else:
+                    gr.Markdown("**No task file found**")
+                    gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
+                    # Create a dummy interface with instructions
+                    with gr.Blocks() as dummy_demo:
+                        gr.Markdown("### No Task Loaded")
+                        gr.Markdown("Please go to the 'Upload & Evaluate' tab to upload a task file.")
+                    reranking_demo = dummy_demo
             except Exception as e:
+                import traceback
+                print(f"Error loading task: {traceback.format_exc()}")
                 gr.Markdown(f"**Error loading task: {str(e)}**")
                 gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
+                # Create a simple error interface
+                with gr.Blocks() as error_demo:
+                    gr.Markdown("### Error Loading Task")
+                    gr.Markdown(f"An error occurred: **{str(e)}**")
+                    gr.Markdown("Please try uploading a different task file.")
+                reranking_demo = error_demo
         with gr.TabItem("Upload & Evaluate"):
             gr.Markdown("""
             download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
 if __name__ == "__main__":
+    try:
+        demo.launch(prevent_thread_lock=True)
+    except Exception as e:
+        import traceback
+        print(f"Error launching demo: {traceback.format_exc()}")
+        print("\nTrying alternative launch method...")
+        try:
+            # Alternative launch method
+            demo.launch(share=False, debug=True)
+        except Exception as e2:
+            print(f"Alternative launch also failed: {str(e2)}")
+            print("\nPlease check your Gradio installation and try again.")