Spaces:

AdnanElAssadi
/

MTEB-Human-Eval-Demo

Sleeping

App Files Files Community

AdnanElAssadi commited on Apr 7

Commit

76c554c

verified ·

1 Parent(s): d986f08

Update app.py

Browse files

Files changed (1) hide show

app.py +416 -611

app.py CHANGED Viewed

@@ -5,585 +5,433 @@ from pathlib import Path
 def create_reranking_interface(task_data):
     """Create a Gradio interface for reranking evaluation using drag and drop."""
-    try:
-        samples = task_data["samples"]
-        results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
-        completed_samples = {s["id"]: False for s in samples}
-        # Define helper functions before the UI elements are created
-        def generate_sortable_html(candidates, existing_ranks=None):
-            """Generate the HTML for the sortable list with up/down buttons."""
-            try:
-                if existing_ranks and len(existing_ranks) == len(candidates):
-                    order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
-                else:
-                    order = list(range(len(candidates)))
-                html = '<div id="sortable-container" class="sortable-container">'
-                for rank_minus_1, idx in enumerate(order):
-                    if idx < len(candidates):
-                        doc = candidates[idx]
-                        rank = rank_minus_1 + 1
-                        import html as html_escaper
-                        escaped_doc = html_escaper.escape(doc)
-                        # Add navigation buttons (up/down arrows)
-                        up_disabled = "disabled" if rank == 1 else ""
-                        down_disabled = "disabled" if rank == len(candidates) else ""
-                        html += f'''\
-                        <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
-                            <div class="rank-controls">
-                                <button type="button" class="rank-btn up-btn" {up_disabled} onclick="window.moveItemUp({rank})">▲</button>
-                                <div class="rank-badge">{rank}</div>
-                                <button type="button" class="rank-btn down-btn" {down_disabled} onclick="window.moveItemDown({rank})">▼</button>
-                            </div>
-                            <div class="doc-content">{escaped_doc}</div>
-                        </div>
-                        '''
-                html += '</div>'
-                # Also return the computed order for proper initialization
-                return html, order
-            except Exception as e:
-                print(f"Error in generate_sortable_html: {str(e)}")
-                return f'<div class="error">Error generating ranking interface: {str(e)}</div>', []
-        def save_ranking(order_json, sample_id):
-            """Save the current ranking to results."""
-            try:
-                if not order_json or order_json == "[]":
-                    return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
-                order = json.loads(order_json)
-                sample = next((s for s in samples if s["id"] == sample_id), None)
-                if not sample:
-                    return "⚠️ Sample not found.", progress_text.value
-                num_candidates = len(sample["candidates"])
-                if len(order) != num_candidates:
-                    return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
-                rankings = [0] * num_candidates
-                try:
-                    for rank_minus_1, doc_idx in enumerate(order):
-                        if doc_idx < num_candidates:
-                            rankings[doc_idx] = rank_minus_1 + 1
-                        else:
-                            raise ValueError(f"Invalid document index {doc_idx} found in order.")
-                except Exception as e:
-                    return f"⚠️ Error processing ranking order: {str(e)}", progress_text.value
-                if sorted(rankings) != list(range(1, num_candidates + 1)):
-                    return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
-                annotation = {"sample_id": sample_id, "rankings": rankings}
-                # Check if this sample was already annotated
-                existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
-                if existing_idx is not None:
-                    results["annotations"][existing_idx] = annotation
                 else:
-                    results["annotations"].append(annotation)
-                completed_samples[sample_id] = True
-                # Save results with timestamp and better error handling
-                try:
-                    output_path = f"{task_data['task_name']}_human_results.json"
-                    with open(output_path, "w") as f:
-                        json.dump(results, f, indent=2)
-                    # Check if all samples are complete
-                    all_completed = sum(completed_samples.values()) == len(samples)
-                    completion_message = "🎉 All samples completed! You can save and submit your results." if all_completed else ""
-                    return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed) {completion_message}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-                except Exception as file_error:
-                    print(f"Error saving file: {str(file_error)}")
-                    # Still mark as completed in memory even if file save fails
-                    return f"⚠️ Rankings recorded but file save failed: {str(file_error)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            except json.JSONDecodeError:
-                return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
-            except Exception as e:
-                import traceback
-                print(traceback.format_exc())
-                return f"Error saving ranking: {str(e)}", progress_text.value
-        def load_sample(sample_id):
-            """Load a sample into the interface."""
-            try:
-                sample = next((s for s in samples if s["id"] == sample_id), None)
-                if not sample:
-                    return gr.update(), gr.update(), "[]", gr.update(), "Sample not found"
-                existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
-                # Get both the HTML and the initial order
-                new_html, initial_order = generate_sortable_html(sample["candidates"], existing_ranking)
-                # Convert initial order to JSON string for state
-                initial_order_json = json.dumps(initial_order)
-                status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
-                progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-                return sample["query"], new_html, initial_order_json, progress, status
-            except Exception as e:
-                import traceback
-                print(traceback.format_exc())
-                return "Error loading sample", "<div>Error loading sample content</div>", "[]", "Error", f"Error: {str(e)}"
-        def next_sample_id(current_id):
-            try:
-                current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
-                if current_idx == -1:
-                    return samples[0]["id"] if samples else current_id
-                next_idx = min(current_idx + 1, len(samples) - 1)
-                return samples[next_idx]["id"]
-            except Exception as e:
-                print(f"Error in next_sample_id: {str(e)}")
-                return current_id
-        def prev_sample_id(current_id):
-            try:
-                current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
-                if current_idx == -1:
-                    return samples[0]["id"] if samples else current_id
-                prev_idx = max(current_idx - 1, 0)
-                return samples[prev_idx]["id"]
-            except Exception as e:
-                print(f"Error in prev_sample_id: {str(e)}")
-                return current_id
-        def save_results():
-            output_path = f"{task_data['task_name']}_human_results.json"
-            try:
-                # Create backup with timestamp
-                from datetime import datetime
-                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-                backup_path = f"{task_data['task_name']}_results_{timestamp}.json"
-                # First create a backup
-                with open(backup_path, "w") as f:
-                    json.dump(results, f, indent=2)
-                # Then save to the main file
-                with open(output_path, "w") as f:
-                    json.dump(results, f, indent=2)
-                return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)\nBackup created at {backup_path}"
-            except Exception as e:
-                return f"⚠️ Error saving results file: {str(e)}"
-        # Create an empty initial sample ID with proper error handling
-        initial_sample_id = samples[0]["id"] if samples else None
-        if not initial_sample_id:
-            print("WARNING: No samples found in task data")
-            return gr.HTML("No samples found in the task data. Please check your task file and try again.")
-        with gr.Blocks(theme=gr.themes.Soft()) as demo:
-            gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
-            with gr.Accordion("Instructions", open=True):
-                gr.Markdown("""
-                ## Task Instructions
-                {instructions}
-                ### How to use this interface:
-                1. Read the query at the top
-                2. Drag and drop documents to reorder them based on relevance
-                3. Top document = Rank 1, Second = Rank 2, etc.
-                4. Click "Submit Rankings" when you're done with the current query
-                5. Use "Previous" and "Next" to navigate between queries
-                6. Click "Save All Results" periodically to ensure your work is saved
-                """.format(instructions=task_data.get("instructions", "Rank the documents based on their relevance to the query.")))
-            current_sample_id = gr.State(value=initial_sample_id)
-            with gr.Row():
-                progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
-                status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
-            with gr.Group():
-                gr.Markdown("## Query:")
-                query_text = gr.Textbox(value="Loading query...", label="", interactive=False)
-                gr.Markdown("## Documents to Rank (Drag to Reorder):")
-                sortable_list = gr.HTML("Loading documents...", elem_id="sortable-list-container")
-                order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
-                with gr.Row():
-                    prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
-                    submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
-                    next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
-                save_btn = gr.Button("💾 Save All Results", variant="secondary")
-            js_code = """
-            <script src="https://cdn.jsdelivr.net/npm/[email protected]/Sortable.min.js"></script>
-            <script>
-            // Make the functions globally available
-            window.moveItemUp = function(currentRank) {
-                console.log('Moving item up:', currentRank);
-                if (currentRank <= 1) return; // Already at the top
-                const container = document.getElementById('sortable-container');
-                if (!container) {
-                    console.error('Container not found');
-                    return;
-                }
-                const items = Array.from(container.querySelectorAll('.sortable-item'));
-                console.log('Found items:', items.length);
-                // Find the items to swap by their data-rank attribute
-                const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
-                const aboveItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank - 1);
-                if (!currentItem || !aboveItem) {
-                    console.error('Items not found:', currentItem, aboveItem);
-                    return;
                 }
-                console.log('Swapping items:', currentItem, aboveItem);
-                // Swap the items in the DOM
-                if (aboveItem.previousElementSibling) {
-                    container.insertBefore(currentItem, aboveItem);
-                } else {
-                    container.insertBefore(currentItem, container.firstChild);
                 }
-                // Update ranks
-                window.updateRanksAfterMove();
-            };
-            window.moveItemDown = function(currentRank) {
-                console.log('Moving item down:', currentRank);
                 const container = document.getElementById('sortable-container');
                 if (!container) {
-                    console.error('Container not found');
                     return;
                 }
-                const items = Array.from(container.querySelectorAll('.sortable-item'));
-                console.log('Found items:', items.length);
-                if (currentRank >= items.length) return; // Already at the bottom
-                // Find the items to swap by their data-rank attribute
-                const currentItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank);
-                const belowItem = items.find(item => parseInt(item.getAttribute('data-rank')) === currentRank + 1);
-                if (!currentItem || !belowItem) {
-                    console.error('Items not found for moving down');
-                    return;
-                }
-                console.log('Swapping items down:', currentItem, belowItem);
-                // Swap the items in the DOM - insert the current item after the below item
-                container.insertBefore(currentItem, belowItem.nextElementSibling);
-                // Update ranks
-                window.updateRanksAfterMove();
-            };
-            window.updateRanksAfterMove = function() {
-                console.log('Updating ranks');
-                const container = document.getElementById('sortable-container');
-                if (!container) {
-                    console.error('Container not found for rank update');
                     return;
                 }
-                const items = Array.from(container.querySelectorAll('.sortable-item'));
-                const orderInput = document.querySelector('#current-order textarea');
-                if (!orderInput) {
-                    console.error('Order input not found');
                     return;
                 }
-                const order = [];
-                items.forEach((item, index) => {
-                    const rank = index + 1;
-                    const docId = parseInt(item.getAttribute('data-doc-id'));
-                    // Update rank display
-                    const rankBadge = item.querySelector('.rank-badge');
-                    if (rankBadge) rankBadge.textContent = rank;
-                    // Update item classes
-                    item.className = item.className.replace(/rank-bg-\\d+/g, '').trim();
-                    item.classList.add(`rank-bg-${rank}`);
-                    // Update data attribute
-                    item.setAttribute('data-rank', rank);
-                    // Update button states
-                    const upBtn = item.querySelector('.up-btn');
-                    const downBtn = item.querySelector('.down-btn');
-                    if (upBtn) {
-                        if (rank == 1) {
-                            upBtn.setAttribute('disabled', 'disabled');
-                        } else {
-                            upBtn.removeAttribute('disabled');
-                        }
-                    }
-                    if (downBtn) {
-                        if (rank == items.length) {
-                            downBtn.setAttribute('disabled', 'disabled');
-                        } else {
-                            downBtn.removeAttribute('disabled');
-                        }
                     }
-                    order.push(docId);
                 });
-                // Update hidden input with JSON
-                console.log('New order:', order);
-                const newOrderValue = JSON.stringify(order);
-                orderInput.value = newOrderValue;
-                // Trigger input event
-                const event = new Event('input', { bubbles: true });
-                orderInput.dispatchEvent(event);
-            };
-            document.addEventListener('DOMContentLoaded', function() {
-                console.log('DOM loaded, initializing ranking interface');
-                // Function to initialize the interface
-                function initializeRankingInterface() {
-                    const container = document.getElementById('sortable-container');
-                    if (!container) {
-                        console.log('Container not found, retrying in 200ms');
-                        setTimeout(initializeRankingInterface, 200);
-                        return;
-                    }
-                    console.log('Sortable container found, setting up');
-                    // Add click events directly to buttons as a backup
-                    const upButtons = container.querySelectorAll('.up-btn');
-                    const downButtons = container.querySelectorAll('.down-btn');
-                    upButtons.forEach(btn => {
-                        btn.addEventListener('click', function() {
-                            const item = this.closest('.sortable-item');
-                            const rank = parseInt(item.getAttribute('data-rank'));
-                            window.moveItemUp(rank);
-                        });
-                    });
-                    downButtons.forEach(btn => {
-                        btn.addEventListener('click', function() {
-                            const item = this.closest('.sortable-item');
-                            const rank = parseInt(item.getAttribute('data-rank'));
-                            window.moveItemDown(rank);
-                        });
-                    });
-                    // Initialize drag-and-drop as fallback
-                    if (typeof Sortable !== 'undefined') {
-                        if (!container.sortableInstance) {
-                            container.sortableInstance = new Sortable(container, {
-                                animation: 150,
-                                ghostClass: "sortable-ghost",
-                                onEnd: function() {
-                                    window.updateRanksAfterMove();
-                                }
-                            });
-                        }
-                    } else {
-                        console.log('Sortable library not available');
-                    }
-                    // Initialize the ranking
-                    window.updateRanksAfterMove();
-                }
-                // Initialize immediately
-                initializeRankingInterface();
-                // Also observe DOM changes to reinitialize when needed
-                const targetNode = document.getElementById('sortable-list-container');
-                if (targetNode) {
-                    const config = { childList: true, subtree: true };
-                    const observer = new MutationObserver(function(mutationsList) {
-                        for(const mutation of mutationsList) {
-                            if (mutation.type === 'childList') {
-                                if (document.getElementById('sortable-container')) {
-                                    console.log('DOM changed, reinitializing');
-                                    initializeRankingInterface();
-                                }
                             }
                         }
-                    });
-                    observer.observe(targetNode, config);
-                }
-            });
-            </script>
-            <style>
-            .sortable-container {
-                display: flex;
-                flex-direction: column;
-                gap: 12px;
-                min-height: 200px;
-                padding: 16px;
-                background-color: #f8f9fa;
-                border-radius: 8px;
-            }
-            .sortable-item {
-                padding: 14px;
-                background-color: #fff;
-                border: 1px solid #e0e0e0;
-                border-radius: 6px;
-                display: flex;
-                align-items: center;
-                transition: all 0.2s ease;
-            }
-            .sortable-item:hover {
-                background-color: #f8f9fa;
-                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-            }
-            .rank-controls {
-                display: flex;
-                flex-direction: column;
-                align-items: center;
-                margin-right: 16px;
-            }
-            .rank-badge {
-                display: flex;
-                align-items: center;
-                justify-content: center;
-                width: 28px;
-                height: 28px;
-                border-radius: 50%;
-                background-color: #6c757d;
-                color: white;
-                font-weight: bold;
-                margin: 6px 0;
-                flex-shrink: 0;
-            }
-            .rank-btn {
-                width: 28px;
-                height: 28px;
-                border: none;
-                background-color: #f0f0f0;
-                border-radius: 4px;
-                margin: 2px 0;
-                cursor: pointer;
-                display: flex;
-                align-items: center;
-                justify-content: center;
-                font-size: 14px;
-            }
-            .rank-btn:hover:not([disabled]) {
-                background-color: #e0e0e0;
-            }
-            .rank-btn:active:not([disabled]) {
-                background-color: #d0d0d0;
-            }
-            .rank-btn:disabled {
-                opacity: 0.5;
-                cursor: not-allowed;
-            }
-            .doc-content {
-                flex: 1;
-                line-height: 1.5;
-                word-break: break-word;
-            }
-            /* More professional color scheme for rank badges */
-            .rank-bg-1 .rank-badge { background-color: #1e40af; } /* Deep blue for top rank */
-            .rank-bg-2 .rank-badge { background-color: #3b82f6; } /* Medium blue */
-            .rank-bg-3 .rank-badge { background-color: #60a5fa; } /* Light blue */
-            .rank-bg-4 .rank-badge { background-color: #93c5fd; color: #1e3a8a; } /* Very light blue with dark text */
-            .rank-bg-5 .rank-badge { background-color: #bfdbfe; color: #1e3a8a; } /* Lightest blue with dark text */
-            /* Lower ranks get progressively more gray */
-            .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge {
-                background-color: #64748b;
-            }
-            .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge, .rank-bg-10 .rank-badge {
-                background-color: #94a3b8;
-                color: #0f172a;
-            }
-            .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge, .rank-bg-13 .rank-badge,
-            .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge, .rank-bg-16 .rank-badge,
-            .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge, .rank-bg-19 .rank-badge,
-            .rank-bg-20 .rank-badge {
-                background-color: #cbd5e1;
-                color: #0f172a;
-            }
-            .error {
-                padding: 16px;
-                background-color: #fee2e2;
-                border: 1px solid #f87171;
-                color: #b91c1c;
-                border-radius: 6px;
-                margin: 16px 0;
             }
-            </style>
-            """
-            gr.HTML(js_code)
-            submit_btn.click(
-                save_ranking,
-                inputs=[order_state, current_sample_id],
-                outputs=[status_box, progress_text]
-            )
-            next_btn.click(
-                next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
-            ).then(
-                load_sample,
-                inputs=[current_sample_id],
-                outputs=[query_text, sortable_list, order_state, progress_text, status_box]
-            )
-            prev_btn.click(
-                prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
-            ).then(
-                load_sample,
-                inputs=[current_sample_id],
-                outputs=[query_text, sortable_list, order_state, progress_text, status_box]
-            )
-            save_btn.click(save_results, outputs=[status_box])
-            # Use a custom loading function with proper error handling
-            def safe_load_initial():
-                try:
-                    if initial_sample_id and samples:
-                        return load_sample(initial_sample_id)
-                    else:
-                        return "No query available", "<div>No documents available</div>", "[]", "No progress data", "Error: No samples found"
-                except Exception as e:
-                    print(f"Error in initial load: {str(e)}")
-                    return "Error loading query", "<div>Error loading documents</div>", "[]", "Error", f"Error: {str(e)}"
-            # Use the safe loading function to prevent scheduling failures
-            demo.load(safe_load_initial,
-                     outputs=[query_text, sortable_list, order_state, progress_text, status_box])
-        return demo
-    except Exception as e:
-        import traceback
-        print(f"Error creating reranking interface: {traceback.format_exc()}")
-        # Return a simple error interface instead of failing completely
-        with gr.Blocks() as error_demo:
-            gr.Markdown("# Error Creating Reranking Interface")
-            gr.Markdown(f"An error occurred while creating the interface: **{str(e)}**")
-            gr.Markdown("Please check your task data and try again.")
-        return error_demo
-# Main app with file upload capability and better error handling
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# MTEB Human Evaluation Demo")
@@ -595,63 +443,35 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             This interface allows you to evaluate the relevance of documents for reranking tasks.
             """)
-            # Function to get the most recent task file with error handling
             def get_latest_task_file():
-                try:
-                    # Check first in uploaded_tasks directory
-                    os.makedirs("uploaded_tasks", exist_ok=True)
-                    uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
-                    if uploaded_tasks:
-                        # Sort by modification time, newest first
-                        uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
-                        return os.path.join("uploaded_tasks", uploaded_tasks[0])
-                    # Fall back to default example
-                    if os.path.exists("AskUbuntuDupQuestions_human_eval.json"):
-                        return "AskUbuntuDupQuestions_human_eval.json"
-                    # If no files found
-                    return None
-                except Exception as e:
-                    print(f"Error getting latest task file: {str(e)}")
-                    return None
-            # Load the task file with proper error handling
             task_file = get_latest_task_file()
-            task_data = None
             try:
-                if task_file and os.path.exists(task_file):
-                    with open(task_file, "r") as f:
-                        task_data = json.load(f)
-                    # Show which task is currently loaded
-                    gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
-                    # Display the interface
-                    reranking_demo = create_reranking_interface(task_data)
-                else:
-                    gr.Markdown("**No task file found**")
-                    gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
-                    # Create a dummy interface with instructions
-                    with gr.Blocks() as dummy_demo:
-                        gr.Markdown("### No Task Loaded")
-                        gr.Markdown("Please go to the 'Upload & Evaluate' tab to upload a task file.")
-                    reranking_demo = dummy_demo
             except Exception as e:
-                import traceback
-                print(f"Error loading task: {traceback.format_exc()}")
                 gr.Markdown(f"**Error loading task: {str(e)}**")
                 gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
-                # Create a simple error interface
-                with gr.Blocks() as error_demo:
-                    gr.Markdown("### Error Loading Task")
-                    gr.Markdown(f"An error occurred: **{str(e)}**")
-                    gr.Markdown("Please try uploading a different task file.")
-                reranking_demo = error_demo
         with gr.TabItem("Upload & Evaluate"):
             gr.Markdown("""
@@ -857,19 +677,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
 if __name__ == "__main__":
-    try:
-        # Use options compatible with Gradio 3.42.0
-        import os
-        # Disable file watching to prevent restart loops
-        os.environ['GRADIO_WATCH'] = 'no'
-        demo.launch(show_error=True)
-    except Exception as e:
-        import traceback
-        print(f"Error launching demo: {traceback.format_exc()}")
-        print("\nTrying alternative launch method...")
-        try:
-            # Alternative launch method
-            demo.launch(share=False, debug=True)
-        except Exception as e2:
-            print(f"Alternative launch also failed: {str(e2)}")
-            print("\nPlease check your Gradio installation and try again.")

 def create_reranking_interface(task_data):
     """Create a Gradio interface for reranking evaluation using drag and drop."""
+    samples = task_data["samples"]
+    results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
+    completed_samples = {s["id"]: False for s in samples}
+    # Define helper functions before UI elements are created
+    def generate_sortable_html(candidates, existing_ranks=None):
+        """Generate the HTML for the sortable list with up/down buttons."""
+        if existing_ranks and len(existing_ranks) == len(candidates):
+            order = sorted(range(len(candidates)), key=lambda i: existing_ranks[i])
+        else:
+            order = list(range(len(candidates)))
+        html = '<div id="sortable-container" class="sortable-container">'
+        for rank_minus_1, idx in enumerate(order):
+            if idx < len(candidates):
+                doc = candidates[idx]
+                rank = rank_minus_1 + 1
+                import html as html_escaper
+                escaped_doc = html_escaper.escape(doc)
+                # Add navigation buttons (up/down arrows)
+                up_disabled = "disabled" if rank == 1 else ""
+                down_disabled = "disabled" if rank == len(candidates) else ""
+                html += f'''\
+                <div class="sortable-item rank-bg-{rank}" data-doc-id="{idx}" data-rank="{rank}">
+                    <div class="rank-controls">
+                        <button class="rank-btn up-btn" {up_disabled} onclick="moveItemUp({rank})">▲</button>
+                        <div class="rank-badge">{rank}</div>
+                        <button class="rank-btn down-btn" {down_disabled} onclick="moveItemDown({rank})">▼</button>
+                    </div>
+                    <div class="doc-content">{escaped_doc}</div>
+                </div>
+                '''
+        html += '</div>'
+        return html
+    def save_ranking(order_json, sample_id):
+        """Save the current ranking to results."""
+        try:
+            if not order_json or order_json == "[]":
+                return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
+            order = json.loads(order_json)
+            num_candidates = len(next(s["candidates"] for s in samples if s["id"] == sample_id))
+            if len(order) != num_candidates:
+                return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
+            rankings = [0] * num_candidates
+            for rank_minus_1, doc_idx in enumerate(order):
+                if doc_idx < num_candidates:
+                    rankings[doc_idx] = rank_minus_1 + 1
                 else:
+                    raise ValueError(f"Invalid document index {doc_idx} found in order.")
+            if sorted(rankings) != list(range(1, num_candidates + 1)):
+                return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
+            annotation = {"sample_id": sample_id, "rankings": rankings}
+            existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
+            if existing_idx is not None:
+                results["annotations"][existing_idx] = annotation
+            else:
+                results["annotations"].append(annotation)
+            completed_samples[sample_id] = True
+            output_path = f"{task_data['task_name']}_human_results.json"
+            with open(output_path, "w") as f:
+                json.dump(results, f, indent=2)
+            return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+        except json.JSONDecodeError:
+            return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
+        except Exception as e:
+            import traceback
+            print(traceback.format_exc())
+            return f"Error saving ranking: {str(e)}", progress_text.value
+    def load_sample(sample_id):
+        """Load a sample into the interface."""
+        try:
+            sample = next((s for s in samples if s["id"] == sample_id), None)
+            if not sample:
+                return gr.update(), gr.update(value="[]"), gr.update(), gr.update()
+            existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
+            new_html = generate_sortable_html(sample["candidates"], existing_ranking)
+            status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
+            progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+            return sample["query"], new_html, "[]", progress, status
+        except Exception as e:
+            return gr.update(), gr.update(value="[]"), gr.update(), gr.update(value=f"Error loading sample: {str(e)}")
+    def next_sample_id(current_id):
+        current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
+        if current_idx == -1:
+            return current_id
+        next_idx = min(current_idx + 1, len(samples) - 1)
+        return samples[next_idx]["id"]
+    def prev_sample_id(current_id):
+        current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
+        if current_idx == -1:
+            return current_id
+        prev_idx = max(current_idx - 1, 0)
+        return samples[prev_idx]["id"]
+    def save_results():
+        output_path = f"{task_data['task_name']}_human_results.json"
+        try:
+            with open(output_path, "w") as f:
+                json.dump(results, f, indent=2)
+            return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
+        except Exception as e:
+            return f"⚠️ Error saving results file: {str(e)}"
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
+        with gr.Accordion("Instructions", open=True):
+            gr.Markdown("""
+            ## Task Instructions
+            {instructions}
+            ### How to use this interface:
+            1. Read the query at the top
+            2. Drag and drop documents to reorder them based on relevance
+            3. Top document = Rank 1, Second = Rank 2, etc.
+            4. Click "Submit Rankings" when you're done with the current query
+            5. Use "Previous" and "Next" to navigate between queries
+            6. Click "Save All Results" periodically to ensure your work is saved
+            """.format(instructions=task_data["instructions"]))
+        current_sample_id = gr.State(value=samples[0]["id"])
+        with gr.Row():
+            progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
+            status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
+        with gr.Group():
+            gr.Markdown("## Query:")
+            query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
+            gr.Markdown("## Documents to Rank (Drag to Reorder):")
+            sortable_list = gr.HTML(generate_sortable_html(samples[0]["candidates"], []), elem_id="sortable-list-container")
+            order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
+            with gr.Row():
+                prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
+                submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
+                next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
+            save_btn = gr.Button("💾 Save All Results", variant="secondary")
+        js_code = """
+        <script src="https://cdn.jsdelivr.net/npm/[email protected].0/Sortable.min.js"></script>
+        <script>
+        // Function to move an item up in the ranking
+        function moveItemUp(currentRank) {
+            if (currentRank <= 1) return; // Already at the top
+            const container = document.getElementById('sortable-container');
+            if (!container) return;
+            const items = container.querySelectorAll('.sortable-item');
+            const itemsArray = Array.from(items);
+            // Find the items to swap
+            const currentItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank);
+            const aboveItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank - 1);
+            if (!currentItem || !aboveItem) return;
+            // Swap the items
+            aboveItem.parentNode.insertBefore(currentItem, aboveItem);
+            // Update ranks
+            updateRanksAfterMove();
+        }
+        // Function to move an item down in the ranking
+        function moveItemDown(currentRank) {
+            const container = document.getElementById('sortable-container');
+            if (!container) return;
+            const items = container.querySelectorAll('.sortable-item');
+            if (currentRank >= items.length) return; // Already at the bottom
+            const itemsArray = Array.from(items);
+            // Find the items to swap
+            const currentItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank);
+            const belowItem = itemsArray.find(item => item.getAttribute('data-rank') == currentRank + 1);
+            if (!currentItem || !belowItem) return;
+            // Swap the items
+            belowItem.parentNode.insertBefore(belowItem, currentItem);
+            // Update ranks
+            updateRanksAfterMove();
+        }
+        // Update rank numbers and classes after moving
+        function updateRanksAfterMove() {
+            const container = document.getElementById('sortable-container');
+            if (!container) return;
+            const items = container.querySelectorAll('.sortable-item');
+            const orderInput = document.querySelector('#current-order textarea');
+            if (!orderInput) return;
+            const order = [];
+            items.forEach((item, index) => {
+                const rank = index + 1;
+                const docId = parseInt(item.getAttribute('data-doc-id'));
+                // Update rank display
+                const rankBadge = item.querySelector('.rank-badge');
+                if (rankBadge) rankBadge.textContent = rank;
+                // Update item classes
+                item.className = item.className.replace(/rank-bg-\d+/g, '').trim();
+                item.classList.add(`rank-bg-${rank}`);
+                // Update data attribute
+                item.setAttribute('data-rank', rank);
+                // Update button states
+                const upBtn = item.querySelector('.up-btn');
+                const downBtn = item.querySelector('.down-btn');
+                if (upBtn) {
+                    if (rank == 1) {
+                        upBtn.setAttribute('disabled', 'disabled');
+                    } else {
+                        upBtn.removeAttribute('disabled');
+                    }
                 }
+                if (downBtn) {
+                    if (rank == items.length) {
+                        downBtn.setAttribute('disabled', 'disabled');
+                    } else {
+                        downBtn.removeAttribute('disabled');
+                    }
                 }
+                order.push(docId);
+            });
+            // Update hidden input
+            const newOrderValue = JSON.stringify(order);
+            if (orderInput.value !== newOrderValue) {
+                orderInput.value = newOrderValue;
+                const event = new Event('input', { bubbles: true });
+                orderInput.dispatchEvent(event);
+            }
+        }
+        document.addEventListener('DOMContentLoaded', function() {
+            function initializeSortable() {
+                // Initialize event handlers for buttons
+                updateRanksAfterMove();
+                // Keep drag-and-drop as a fallback
                 const container = document.getElementById('sortable-container');
                 if (!container) {
+                    console.log('Container not found, retrying...');
+                    setTimeout(initializeSortable, 200);
                     return;
                 }
+                if (typeof Sortable === 'undefined') {
+                    console.log('Sortable not loaded, retrying...');
+                    setTimeout(initializeSortable, 200);
                     return;
                 }
+                if (container.sortableInstance) {
                     return;
                 }
+                container.sortableInstance = new Sortable(container, {
+                    animation: 150,
+                    ghostClass: "sortable-ghost",
+                    onEnd: function() {
+                        updateRanksAfterMove();
                     }
                 });
+            }
+            // Initialize immediately and also set up a mutation observer
+            initializeSortable();
+            const targetNode = document.getElementById('sortable-list-container');
+            if (targetNode) {
+                const config = { childList: true, subtree: true };
+                const observer = new MutationObserver(function(mutationsList) {
+                    for(const mutation of mutationsList) {
+                        if (mutation.type === 'childList') {
+                            if (document.getElementById('sortable-container')) {
+                                initializeSortable();
                             }
                         }
+                    }
+                });
+                observer.observe(targetNode, config);
             }
+        });
+        </script>
+        <style>
+        .sortable-container {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+            min-height: 200px;
+            padding: 10px;
+            background-color: #f8f9fa;
+            border-radius: 8px;
+        }
+        .sortable-item {
+            padding: 12px 15px;
+            background-color: #fff;
+            border: 1px solid #e0e0e0;
+            border-radius: 6px;
+            cursor: grab;
+            display: flex;
+            align-items: center;
+            transition: all 0.2s ease;
+            user-select: none;
+        }
+        .sortable-item:hover {
+            background-color: #f8f9fa;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .sortable-ghost {
+            background-color: #e3f2fd !important;
+            border-style: dashed !important;
+            opacity: 0.8;
+        }
+        .sortable-chosen {
+            cursor: grabbing;
+            box-shadow: 0 4px 8px rgba(0,0,0,0.1);
+        }
+        .rank-controls {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            margin-right: 15px;
+        }
+        .rank-badge {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            width: 28px;
+            height: 28px;
+            border-radius: 50%;
+            background-color: #6c757d;
+            color: white;
+            font-weight: bold;
+            margin: 4px 0;
+            flex-shrink: 0;
+        }
+        .rank-btn {
+            border: none;
+            background: #f0f0f0;
+            border-radius: 4px;
+            width: 24px;
+            height: 24px;
+            font-size: 12px;
+            line-height: 1;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            cursor: pointer;
+            color: #333;
+        }
+        .rank-btn:hover:not([disabled]) {
+            background: #e0e0e0;
+        }
+        .rank-btn:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+        }
+        .doc-content {
+            flex: 1;
+            line-height: 1.4;
+            word-break: break-word;
+        }
+        .rank-bg-1 .rank-badge { background-color: #198754; }
+        .rank-bg-2 .rank-badge { background-color: #20c997; }
+        .rank-bg-3 .rank-badge { background-color: #ffc107; color: #333; }
+        .rank-bg-4 .rank-badge { background-color: #fd7e14; }
+        .rank-bg-5 .rank-badge { background-color: #dc3545; }
+        .rank-bg-6 .rank-badge, .rank-bg-7 .rank-badge { background-color: #6f42c1; }
+        .rank-bg-8 .rank-badge, .rank-bg-9 .rank-badge { background-color: #d63384; }
+        .rank-bg-10 .rank-badge, .rank-bg-11 .rank-badge, .rank-bg-12 .rank-badge,
+        .rank-bg-13 .rank-badge, .rank-bg-14 .rank-badge, .rank-bg-15 .rank-badge,
+        .rank-bg-16 .rank-badge, .rank-bg-17 .rank-badge, .rank-bg-18 .rank-badge,
+        .rank-bg-19 .rank-badge, .rank-bg-20 .rank-badge {
+            background-color: #6c757d;
+        }
+        </style>
+        """
+        gr.HTML(js_code)
+        submit_btn.click(
+            save_ranking,
+            inputs=[order_state, current_sample_id],
+            outputs=[status_box, progress_text]
+        )
+        next_btn.click(
+            next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
+        ).then(
+            load_sample,
+            inputs=[current_sample_id],
+            outputs=[query_text, sortable_list, order_state, progress_text, status_box]
+        )
+        prev_btn.click(
+            prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
+        ).then(
+            load_sample,
+            inputs=[current_sample_id],
+            outputs=[query_text, sortable_list, order_state, progress_text, status_box]
+        )
+        save_btn.click(save_results, outputs=[status_box])
+        demo.load(lambda: load_sample(samples[0]['id']),
+                  outputs=[query_text, sortable_list, order_state, progress_text, status_box])
+    return demo
+# Main app with file upload capability
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# MTEB Human Evaluation Demo")
             This interface allows you to evaluate the relevance of documents for reranking tasks.
             """)
+            # Function to get the most recent task file
             def get_latest_task_file():
+                # Check first in uploaded_tasks directory
+                os.makedirs("uploaded_tasks", exist_ok=True)
+                uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
+                if uploaded_tasks:
+                    # Sort by modification time, newest first
+                    uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
+                    return os.path.join("uploaded_tasks", uploaded_tasks[0])
+                # Fall back to default example
+                return "AskUbuntuDupQuestions_human_eval.json"
+            # Load the task file
             task_file = get_latest_task_file()
             try:
+                with open(task_file, "r") as f:
+                    task_data = json.load(f)
+                # Show which task is currently loaded
+                gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
+                # Display the interface
+                reranking_demo = create_reranking_interface(task_data)
             except Exception as e:
                 gr.Markdown(f"**Error loading task: {str(e)}**")
                 gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
         with gr.TabItem("Upload & Evaluate"):
             gr.Markdown("""
             download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
 if __name__ == "__main__":
+    demo.launch()