Spaces:

dongsheng
/

docker_test

Sleeping

App Files Files Community

3v324v23 commited on Mar 20

Commit

dd10f90

1 Parent(s): e70dce5

new1

Browse files

Files changed (6) hide show

README.md +42 -0
app.py +30 -733
requirements.txt +3 -1
src/evaluator.py +94 -0
src/queue_manager.py +194 -0
src/ui.py +212 -0

README.md CHANGED Viewed

@@ -8,3 +8,45 @@ pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# 代码评估服务
+这是一个支持多种编程语言的代码评估服务，专为Hugging Face Spaces设计。
+## 功能特点
+- 支持多种编程语言的代码评估
+- 使用消息队列处理并发请求
+- 实时显示队列状态
+- 提供API接口，便于集成
+## 系统架构
+系统采用模块化设计，主要由以下组件构成：
+- **Queue Manager**: 处理请求队列，实现请求排队和顺序处理
+- **Evaluator**: 负责代码评估逻辑，支持多种编程语言
+- **UI**: 基于Gradio的用户界面，展示队列状态和评估结果
+## 使用方法
+1. 在Gradio界面中，提交代码评估请求
+2. 系统将请求加入队列并返回请求ID
+3. 使用请求ID查询评估结果
+4. 实时查看队列状态
+## 文件结构
+```
+.
+├── app.py                  # 主应用入口
+├── src/
+│   ├── __init__.py         # 包初始化
+│   ├── containerized_eval.py # 容器化评估工具
+│   ├── evaluator.py        # 代码评估逻辑
+│   ├── queue_manager.py    # 队列管理器
+│   ├── ui.py               # 用户界面组件
+│   └── ...                 # 其他语言评估模块
+├── requirements.txt        # 依赖项
+└── Dockerfile              # 容器配置
+```

app.py CHANGED Viewed

@@ -1,21 +1,9 @@
-import gradio as gr
-import json
-import importlib
 import os
 import sys
 from pathlib import Path
-import concurrent.futures
-import multiprocessing
-import time
-import threading
-import queue
-import uuid
-import numpy as np
-from datetime import datetime
-from tqdm.auto import tqdm
-from src.containerized_eval import eval_string_script
-# Add current directory and src directory to module search path
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_dir = os.path.join(current_dir, "src")
 if current_dir not in sys.path:
@@ -23,732 +11,41 @@ if current_dir not in sys.path:
 if src_dir not in sys.path:
     sys.path.append(src_dir)
-# Create message queue
-task_queue = queue.Queue()
-# Dictionary to store task status
-task_status = {}
-# List to store task history, max 200 tasks
-task_history = []
-# Lock for shared resources
-lock = threading.Lock()
-# Number of worker threads
-worker_threads = 1  # Process only one task at a time
-# Flag for running background threads
-running = True
-# Mapping from task type to processing time
-task_type_times = {}
-def queue_processor():
-    """Process tasks in the queue"""
-    while running:
-        try:
-            # Use get with a timeout to prevent busy waiting
-            try:
-                task_id, input_data, request_time = task_queue.get(timeout=0.5)
-            except queue.Empty:
-                continue
-            try:
-                with lock:
-                    if task_id not in task_status:
-                        # Task was cancelled or removed
-                        task_queue.task_done()
-                        continue
-                    task_status[task_id]['status'] = 'processing'
-                    task_status[task_id]['start_time'] = time.time()
-                if isinstance(input_data, list) and len(input_data) > 0:
-                    sample_task = input_data[0]
-                    language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
-                    task_size = len(input_data)
-                    task_complexity = _estimate_task_complexity(input_data)
-                    with lock:
-                        task_status[task_id]['estimated_factors'] = {
-                            'language': language,
-                            'size': task_size,
-                            'complexity': task_complexity
-                        }
-                result = evaluate(input_data)
-                end_time = time.time()
-                process_time = end_time - task_status[task_id]['start_time']
-                with lock:
-                    if task_id in task_status:  # Check if task still exists
-                        task_status[task_id]['status'] = 'completed'
-                        task_status[task_id]['result'] = result
-                        task_status[task_id]['end_time'] = end_time
-                        task_status[task_id]['process_time'] = process_time
-                        if 'estimated_factors' in task_status[task_id]:
-                            factors = task_status[task_id]['estimated_factors']
-                            key = f"{factors['language']}_{factors['complexity']}"
-                            if key not in task_type_times:
-                                task_type_times[key] = []
-                            task_type_times[key].append(process_time / factors['size'])
-                            if len(task_type_times[key]) > 10:
-                                task_type_times[key] = task_type_times[key][-10:]
-                        task_history.append({
-                            'task_id': task_id,
-                            'request_time': request_time,
-                            'process_time': process_time,
-                            'status': 'completed',
-                            'factors': task_status[task_id].get('estimated_factors', {})
-                        })
-                        while len(task_history) > 200:
-                            task_history.pop(0)
-            except Exception as e:
-                print(f"Error processing task {task_id}: {str(e)}")
-                with lock:
-                    if task_id in task_status:
-                        task_status[task_id]['status'] = 'error'
-                        task_status[task_id]['error'] = str(e)
-                        task_status[task_id]['end_time'] = time.time()
-            finally:
-                task_queue.task_done()
-        except Exception as e:
-            print(f"Critical error in queue processor: {str(e)}")
-            time.sleep(1)  # Avoid tight loop in case of persistent errors
-def _estimate_task_complexity(tasks):
-    """Estimate task complexity
-    Returns: 'simple', 'medium', or 'complex'
-    """
-    total_code_length = 0
-    count = 0
-    for task in tasks:
-        if isinstance(task, dict):
-            prompt = task.get('prompt', '')
-            tests = task.get('tests', '')
-            completions = task.get('processed_completions', [])
-            code_length = len(prompt) + len(tests)
-            if completions:
-                code_length += sum(len(comp) for comp in completions)
-            total_code_length += code_length
-            count += 1
-    if count == 0:
-        return 'medium'
-    avg_length = total_code_length / count
-    if avg_length < 1000:
-        return 'simple'
-    elif avg_length < 5000:
-        return 'medium'
-    else:
-        return 'complex'
-def evaluate(input_data):
-    """Main function for code evaluation"""
-    try:
-        if not isinstance(input_data, list):
-            return {"status": "Exception", "error": "Input must be a list"}
-        results = []
-        # Use a moderate number of workers for all language tests to ensure stability
-        # This prevents resource contention regardless of language
-        max_workers = max(1, min(multiprocessing.cpu_count() // 2, 4))
-        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-            future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
-            for future in concurrent.futures.as_completed(future_to_item):
-                item = future_to_item[future]
-                try:
-                    result = future.result()
-                    item.update(result)
-                    results.append(item)
-                except Exception as e:
-                    item.update({"status": "Exception", "error": str(e)})
-                    results.append(item)
-        return results
-    except Exception as e:
-        return {"status": "Exception", "error": str(e)}
-def evaluate_single_case(input_data):
-    """Evaluate a single code case"""
-    try:
-        if not isinstance(input_data, dict):
-            return {"status": "Exception", "error": "Input item must be a dictionary"}
-        language = input_data.get('language')
-        completions = input_data.get('processed_completions', [])
-        if not completions:
-            return {"status": "Exception", "error": "No code provided"}
-        # Use a retry mechanism for all languages for better reliability
-        max_retries = 2  # One retry for all languages
-        results = []
-        for comp in completions:
-            code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
-            # Try up to max_retries + 1 times for all test cases
-            for attempt in range(max_retries + 1):
-                result = evaluate_code(code, language)
-                # If success or last attempt, return/record the result
-                if result["status"] == "OK" or attempt == max_retries:
-                    if result["status"] == "OK":
-                        return result
-                    results.append(result)
-                    break
-                # For retries, briefly wait to allow resources to stabilize
-                time.sleep(0.3)
-        return results[0]
-    except Exception as e:
-        return {"status": "Exception", "error": str(e)}
-def evaluate_code(code, language):
-    """Evaluate code in a specific language"""
-    try:
-        result = eval_string_script(language, code)
-        return result
-    except Exception as e:
-        return {"status": "Exception", "error": str(e)}
-def synchronous_evaluate(input_data):
-    """Synchronously evaluate code, compatible with original interface"""
-    try:
-        if isinstance(input_data, list) and len(input_data) > 0:
-            sample_task = input_data[0]
-            language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
-            task_size = len(input_data)
-            task_complexity = _estimate_task_complexity(input_data)
-        else:
-            language = 'unknown'
-            task_size = 1
-            task_complexity = 'medium'
-        estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
-        estimated_total_time = estimated_time_per_task * task_size
-        queue_info = get_queue_status()
-        waiting_tasks = queue_info['waiting_tasks']
-        task_id = str(uuid.uuid4())
-        request_time = time.time()
-        with lock:
-            task_status[task_id] = {
-                'status': 'queued',
-                'queued_time': request_time,
-                'queue_position': task_queue.qsize() + 1,
-                'synchronous': True,
-                'estimated_factors': {
-                    'language': language,
-                    'size': task_size,
-                    'complexity': task_complexity
-                },
-                'estimated_time': estimated_total_time
-            }
-        task_queue.put((task_id, input_data, request_time))
-        # Set a reasonable timeout to avoid hanging
-        max_wait_time = max(300, estimated_total_time * 2)  # At least 5 minutes or double the estimated time
-        start_wait = time.time()
         while True:
-            with lock:
-                if task_id in task_status:
-                    status = task_status[task_id]['status']
-                    if status == 'completed':
-                        result = task_status[task_id]['result']
-                        task_status.pop(task_id, None)
-                        return result
-                    elif status == 'error':
-                        error = task_status[task_id].get('error', 'Unknown error')
-                        task_status.pop(task_id, None)
-                        return {"status": "Exception", "error": error}
-                else:
-                    # Task somehow disappeared
-                    return {"status": "Exception", "error": "Task was lost during processing"}
-            # Check if we've waited too long
-            if time.time() - start_wait > max_wait_time:
-                with lock:
-                    if task_id in task_status:
-                        task_status[task_id]['status'] = 'error'
-                        task_status[task_id]['error'] = f"Task timed out after {max_wait_time} seconds"
-                    return {"status": "Exception", "error": f"Evaluation timed out after {max_wait_time} seconds"}
-            time.sleep(0.1)
-    except Exception as e:
-        print(f"Error in synchronous_evaluate: {str(e)}")
-        return {"status": "Exception", "error": str(e)}
-def _get_estimated_time_for_task(language, complexity):
-    """Get estimated processing time for a specific task type"""
-    key = f"{language}_{complexity}"
-    if key in task_type_times and len(task_type_times[key]) > 0:
-        return np.median(task_type_times[key])
-    if complexity == 'simple':
-        return 1.0
-    elif complexity == 'medium':
-        return 3.0
-    else:  # complex
-        return 8.0
-def enqueue_task(input_data):
-    """Add task to queue"""
-    if isinstance(input_data, list) and len(input_data) > 0:
-        sample_task = input_data[0]
-        language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
-        task_size = len(input_data)
-        task_complexity = _estimate_task_complexity(input_data)
-    else:
-        language = 'unknown'
-        task_size = 1
-        task_complexity = 'medium'
-    estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
-    estimated_total_time = estimated_time_per_task * task_size
-    task_id = str(uuid.uuid4())
-    request_time = time.time()
-    # Get current queue size before adding new task
-    current_queue_size = task_queue.qsize()
-    with lock:
-        task_status[task_id] = {
-            'status': 'queued',
-            'queued_time': request_time,
-            'queue_position': current_queue_size + 1,
-            'estimated_factors': {
-                'language': language,
-                'size': task_size,
-                'complexity': task_complexity
-            },
-            'estimated_time': estimated_total_time
-        }
-    queue_info = get_queue_status()
-    est_wait = queue_info['estimated_wait']
-    task_queue.put((task_id, input_data, request_time))
-    return {
-        'task_id': task_id,
-        'status': 'queued',
-        'queue_position': task_status[task_id]['queue_position'],
-        'estimated_wait': est_wait,
-        'estimated_processing': estimated_total_time
-    }
-def check_status(task_id):
-    """Check task status"""
-    with lock:
-        if task_id not in task_status:
-            return {'status': 'not_found'}
-        status_info = task_status[task_id].copy()
-        if status_info['status'] in ['completed', 'error'] and time.time() - status_info.get('end_time', 0) > 3600:
-            task_status.pop(task_id, None)
-        return status_info
-def get_queue_status():
-    """Get queue status"""
-    with lock:
-        # Get all tasks that are either queued or processing
-        queued_tasks = [t for t in task_status.values() if t['status'] == 'queued']
-        processing_tasks = [t for t in task_status.values() if t['status'] == 'processing']
-        # Get the actual queue size from the queue object
-        actual_queue_size = task_queue.qsize()
-        # Use the larger of actual queue size or queued tasks count
-        queue_size = max(actual_queue_size, len(queued_tasks))
-        active_tasks = len(processing_tasks)
-        waiting_tasks = queue_size
-        # Clean up completed or error tasks that are older than 1 hour
-        current_time = time.time()
-        for task_id in list(task_status.keys()):
-            if task_status[task_id]['status'] in ['completed', 'error']:
-                if current_time - task_status[task_id].get('end_time', 0) > 3600:
-                    task_status.pop(task_id, None)
-        remaining_processing_time = 0
-        for task in processing_tasks:
-            if 'start_time' in task and 'estimated_time' in task:
-                elapsed = time.time() - task['start_time']
-                remaining = max(0, task['estimated_time'] - elapsed)
-                remaining_processing_time += remaining
-            else:
-                remaining_processing_time += 2
-        if active_tasks > 0:
-            remaining_processing_time = remaining_processing_time / min(active_tasks, worker_threads)
-        queued_processing_time = 0
-        for task in queued_tasks:
-            if 'estimated_time' in task:
-                queued_processing_time += task['estimated_time']
-            else:
-                queued_processing_time += 5
-        estimated_wait = remaining_processing_time + queued_processing_time
-        if task_history:
-            prediction_ratios = []
-            for task in task_history:
-                if 'factors' in task and 'estimated_time' in task:
-                    prediction_ratios.append(task['process_time'] / task['estimated_time'])
-            if prediction_ratios:
-                correction_factor = np.median(prediction_ratios)
-                correction_factor = max(0.5, min(2.0, correction_factor))
-                estimated_wait *= correction_factor
-        estimated_wait = max(0.1, estimated_wait)
-        if waiting_tasks == 0 and active_tasks == 0:
-            estimated_wait = 0
-        recent_tasks = task_history[-5:] if task_history else []
-        return {
-            'queue_size': queue_size,
-            'active_tasks': active_tasks,
-            'waiting_tasks': waiting_tasks,
-            'worker_threads': worker_threads,
-            'estimated_wait': estimated_wait,
-            'recent_tasks': recent_tasks
-        }
-def format_time(seconds):
-    """Format time into readable format"""
-    if seconds < 60:
-        return f"{seconds:.1f} seconds"
-    elif seconds < 3600:
-        minutes = int(seconds / 60)
-        seconds = seconds % 60
-        return f"{minutes}m {seconds:.1f}s"
-    else:
-        hours = int(seconds / 3600)
-        minutes = int((seconds % 3600) / 60)
-        return f"{hours}h {minutes}m"
-def ui_get_queue_info():
-    """Get queue info for UI"""
-    queue_info = get_queue_status()
-    tasks_html = ""
-    for task in reversed(queue_info['recent_tasks']):
-        tasks_html += f"""
-        <tr>
-            <td>{task['task_id'][:8]}...</td>
-            <td>{datetime.fromtimestamp(task['request_time']).strftime('%H:%M:%S')}</td>
-            <td>{format_time(task['process_time'])}</td>
-        </tr>
-        """
-    if not tasks_html:
-        tasks_html = """
-        <tr>
-            <td colspan="3" style="text-align: center; padding: 20px;">No historical tasks</td>
-        </tr>
-        """
-    return f"""
-    <div class="dashboard">
-        <div class="queue-info-card main-card">
-            <h3 class="card-title">Queue Status Monitor</h3>
-            <div class="queue-stats">
-                <div class="stat-item">
-                    <div class="stat-value">{queue_info['waiting_tasks']}</div>
-                    <div class="stat-label">Waiting</div>
-                </div>
-                <div class="stat-item">
-                    <div class="stat-value">{queue_info['active_tasks']}</div>
-                    <div class="stat-label">Processing</div>
-                </div>
-                <div class="stat-item">
-                    <div class="stat-value">{queue_info['worker_threads']}</div>
-                    <div class="stat-label">Worker Threads</div>
-                </div>
-            </div>
-            <div class="wait-time">
-                <p><b>Current Estimated Wait Time:</b> {format_time(queue_info['estimated_wait'])}</p>
-                <p class="last-update"><small>Last update: {datetime.now().strftime('%H:%M:%S')}</small></p>
-            </div>
-        </div>
-        <div class="queue-info-card history-card">
-            <h3 class="card-title">Recently Processed Tasks</h3>
-            <table class="recent-tasks">
-                <thead>
-                    <tr>
-                        <th>Task ID</th>
-                        <th>Request Time</th>
-                        <th>Processing Time</th>
-                    </tr>
-                </thead>
-                <tbody>
-                    {tasks_html}
-                </tbody>
-            </table>
-        </div>
-    </div>
-    """
-def launch_workers():
-    """Launch worker threads"""
-    global running
-    running = True
-    for _ in range(worker_threads):
-        worker = threading.Thread(target=queue_processor)
-        worker.daemon = True
-        worker.start()
-# Custom CSS
-custom_css = """
-.container {
-    max-width: 1200px;
-    margin: 0 auto;
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-}
-.dashboard {
-    display: flex;
-    flex-direction: column;
-    gap: 20px;
-}
-.card-title {
-    color: #333;
-    border-bottom: 2px solid #ddd;
-    padding-bottom: 10px;
-    margin-top: 0;
-}
-.status-card, .queue-info-card {
-    background: #fff;
-    border-radius: 12px;
-    padding: 20px;
-    margin: 10px 0;
-    box-shadow: 0 4px 15px rgba(0,0,0,0.08);
-}
-.main-card {
-    border-top: 5px solid #4285f4;
-}
-.history-card {
-    border-top: 5px solid #34a853;
-}
-.status-card.success {
-    background: #e7f5e7;
-    border-left: 5px solid #28a745;
-}
-.status-card.error {
-    background: #f8d7da;
-    border-left: 5px solid #dc3545;
-}
-.error-message {
-    color: #dc3545;
-    font-weight: bold;
-    padding: 10px;
-    background: #f8d7da;
-    border-radius: 5px;
-}
-.notice {
-    color: #0c5460;
-    background-color: #d1ecf1;
-    padding: 10px;
-    border-radius: 5px;
-}
-.queue-stats {
-    display: flex;
-    justify-content: space-around;
-    margin: 20px 0;
-}
-.stat-item {
-    text-align: center;
-    padding: 15px;
-    background: #f8f9fa;
-    border-radius: 10px;
-    min-width: 120px;
-    transition: transform 0.3s ease;
-}
-.stat-item:hover {
-    transform: translateY(-5px);
-    box-shadow: 0 5px 15px rgba(0,0,0,0.1);
-}
-.stat-value {
-    font-size: 32px;
-    font-weight: bold;
-    color: #4285f4;
-    margin-bottom: 5px;
-}
-.stat-label {
-    color: #5f6368;
-    font-size: 16px;
-}
-.wait-time {
-    text-align: center;
-    margin: 20px 0;
-    padding: 15px;
-    background: #f1f3f4;
-    border-radius: 8px;
-    font-size: 18px;
-}
-.last-update {
-    color: #80868b;
-    margin-top: 10px;
-    margin-bottom: 0;
-}
-.recent-tasks {
-    width: 100%;
-    border-collapse: collapse;
-    margin-top: 15px;
-    background: white;
-    box-shadow: 0 1px 3px rgba(0,0,0,0.05);
-}
-.recent-tasks th, .recent-tasks td {
-    border: 1px solid #e0e0e0;
-    padding: 12px 15px;
-    text-align: center;
-}
-.recent-tasks th {
-    background-color: #f1f3f4;
-    color: #202124;
-    font-weight: 500;
-}
-.recent-tasks tbody tr:hover {
-    background-color: #f8f9fa;
-}
-.tabs {
-    margin-top: 20px;
-}
-button.primary {
-    background-color: #4285f4;
-    color: white;
-    padding: 10px 20px;
-    border: none;
-    border-radius: 4px;
-    cursor: pointer;
-    font-size: 16px;
-    font-weight: 500;
-    transition: background-color 0.3s;
-}
-button.primary:hover {
-    background-color: #3367d6;
-}
-"""
-# Initialize and launch worker threads
-launch_workers()
-# Create Gradio interface
-with gr.Blocks(css=custom_css) as demo:
-    gr.Markdown("# Code Evaluation Service")
-    gr.Markdown("Code evaluation service supporting multiple programming languages, using queue mechanism to process requests")
-    with gr.Row():
-        with gr.Column(scale=3):
-            # Queue status info card
-            queue_info_html = gr.HTML(ui_get_queue_info())
-            refresh_queue_btn = gr.Button("Refresh Queue Status", variant="primary")
-    # Hidden API interface components
-    with gr.Row(visible=False):
-        api_input = gr.JSON()
-        api_output = gr.JSON()
-    # Define update function
-    def update_queue_info():
-        try:
-            info = ui_get_queue_info()
-            return info
-        except Exception as e:
-            print(f"Error updating queue info: {str(e)}")
-            return f"""
-            <div class="dashboard">
-                <div class="queue-info-card main-card">
-                    <h3 class="card-title">Queue Status Monitor</h3>
-                    <div class="queue-stats">
-                        <p>Error refreshing queue status. Click refresh to try again.</p>
-                    </div>
-                </div>
-            </div>
-            """
-    # Set up periodic refresh with error handling
-    refresh_interval = 3
-    def safe_refresh():
-        try:
-            return update_queue_info()
-        except Exception as e:
-            print(f"Safe refresh error: {str(e)}")
-            return queue_info_html.value  # Keep existing value on error
-    # Update queue info periodically
-    demo.load(safe_refresh, None, queue_info_html, every=refresh_interval)
-    # Refresh button event
-    refresh_queue_btn.click(update_queue_info, None, queue_info_html)
-    # Add evaluation endpoint compatible with original interface
-    demo.queue()
-    evaluate_endpoint = demo.load(fn=synchronous_evaluate, inputs=api_input, outputs=api_output, api_name="evaluate")
 if __name__ == "__main__":
-    try:
-        # Use configuration suitable for Hugging Face Spaces
-        demo.launch(
-            server_name="0.0.0.0",  # Listen on all network interfaces
-            share=False,            # No need for sharing link in Spaces
-            show_error=True,        # Show detailed error messages
-            debug=True,             # Enable debug mode for better error reporting
-            max_threads=10,         # Limit number of threads to avoid resource contention
-            quiet=False             # Log to console for debugging
-        )
-    finally:
-        # Stop worker threads
-        running = False

 import os
 import sys
 from pathlib import Path
+import gradio as gr
+# 添加当前目录和src目录到模块搜索路径
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_dir = os.path.join(current_dir, "src")
 if current_dir not in sys.path:
 if src_dir not in sys.path:
     sys.path.append(src_dir)
+from src.queue_manager import QueueManager
+from src.evaluator import evaluate
+from src.ui import EvaluationUI
+def main():
+    """主函数，创建并启动应用"""
+    # 创建队列管理器
+    queue_manager = QueueManager()
+    # 设置评估函数
+    queue_manager.set_evaluator(evaluate)
+    # 创建UI
+    ui = EvaluationUI(queue_manager)
+    app = ui.create_evaluation_interface()
+    # 启动清理线程
+    import threading
+    import time
+    def cleanup_thread():
+        """定期清理已完成的请求"""
         while True:
+            try:
+                queue_manager.clean_completed(max_age=3600)  # 清理1小时前的已完成请求
+            except Exception as e:
+                print(f"Error in cleanup thread: {e}")
+            time.sleep(600)  # 每10分钟运行一次
+    cleanup_thread = threading.Thread(target=cleanup_thread, daemon=True)
+    cleanup_thread.start()
+    # 返回应用
+    return app
 if __name__ == "__main__":
+    app = main()
+    app.launch()

requirements.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	- gradio==4.44.1

+gradio==4.44.1
+typing_extensions>=4.0.0
+# dataclasses is built-in for Python 3.7+

src/evaluator.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import concurrent.futures
+import multiprocessing
+from .containerized_eval import eval_string_script
+def evaluate(input_data):
+    """评估代码的主函数
+    Args:
+        input_data: 列表(批量处理多个测试用例)
+    Returns:
+        list: 包含评估结果的列表
+    """
+    try:
+        if not isinstance(input_data, list):
+            return {"status": "Exception", "error": "Input must be a list"}
+        results = []
+        max_workers = multiprocessing.cpu_count() // 2
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
+            for future in concurrent.futures.as_completed(future_to_item):
+                item = future_to_item[future]
+                try:
+                    result = future.result()
+                    item.update(result)
+                    results.append(item)
+                except Exception as e:
+                    item.update({"status": "Exception", "error": str(e)})
+                    results.append(item)
+        return results
+    except Exception as e:
+        return {"status": "Exception", "error": str(e)}
+def evaluate_single_case(input_data):
+    """评估单个代码用例
+    Args:
+        input_data: 字典(包含代码信息)
+    Returns:
+        dict: 包含评估结果的字典
+    """
+    try:
+        if not isinstance(input_data, dict):
+            return {"status": "Exception", "error": "Input item must be a dictionary"}
+        language = input_data.get('language')
+        completions = input_data.get('processed_completions', [])
+        if not completions:
+            return {"status": "Exception", "error": "No code provided"}
+        results = []
+        for comp in completions:
+            code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
+            result = evaluate_code(code, language)
+            if result["status"] == "OK":
+                return {"status": "pass", "compiled_code": code}
+            print(f'Code failed to compile: \n{code}')
+            result["compiled_code"] = code
+            results.append(result)
+        return results[0]
+    except Exception as e:
+        return {"status": "Exception", "error": str(e)}
+def evaluate_code(code, language):
+    """评估特定语言的代码
+    Args:
+        code (str): 要评估的代码
+        language (str): 编程语言
+    Returns:
+        dict: 包含评估结果的字典
+    """
+    try:
+        # 使用containerized_eval中的eval_string_script函数
+        result = eval_string_script(language, code)
+        if result["exit_code"] == 0:
+            return {"status": "OK", "output": result["stdout"]}
+        else:
+            return {
+                "status": "Error",
+                "error": result["stderr"] if result["stderr"] else "Unknown error",
+                "output": result["stdout"]
+            }
+    except Exception as e:
+        return {"status": "Exception", "error": str(e)}

src/queue_manager.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import queue
+import threading
+import time
+from dataclasses import dataclass, field
+from typing import Dict, List, Any, Optional
+import uuid
+@dataclass
+class QueueItem:
+    """代表队列中的一个评估请求"""
+    id: str  # 唯一标识符
+    input_data: Dict[str, Any]  # 输入数据
+    status: str = "queued"  # 状态：queued, processing, completed, error
+    result: Optional[Dict[str, Any]] = None  # 评估结果
+    created_at: float = field(default_factory=time.time)  # 创建时间
+    started_at: Optional[float] = None  # 开始处理时间
+    completed_at: Optional[float] = None  # 完成时间
+class QueueManager:
+    """管理代码评估请求队列"""
+    def __init__(self):
+        self.queue = queue.Queue()  # 请求队列
+        self.queue_items = {}  # 存储所有队列项，以ID为键
+        self.processing_thread = None  # 处理线程
+        self.running = False  # 是否正在运行
+        self.evaluator = None  # 评估器，将在开始时设置
+    def set_evaluator(self, evaluator_func):
+        """设置评估函数
+        Args:
+            evaluator_func: 评估函数，接受输入数据并返回结果
+        """
+        self.evaluator = evaluator_func
+    def enqueue(self, input_data):
+        """添加新的评估请求到队列
+        Args:
+            input_data: 输入数据，包含要评估的代码
+        Returns:
+            str: 请求的唯一ID
+        """
+        item_id = str(uuid.uuid4())
+        queue_item = QueueItem(id=item_id, input_data=input_data)
+        self.queue_items[item_id] = queue_item
+        self.queue.put(item_id)
+        # 如果处理线程未运行，则启动它
+        if not self.running:
+            self.start_processing()
+        return item_id
+    def start_processing(self):
+        """启动队列处理线程"""
+        if self.processing_thread is None or not self.processing_thread.is_alive():
+            self.running = True
+            self.processing_thread = threading.Thread(target=self._process_queue)
+            self.processing_thread.daemon = True
+            self.processing_thread.start()
+    def stop_processing(self):
+        """停止队列处理"""
+        self.running = False
+        if self.processing_thread and self.processing_thread.is_alive():
+            self.processing_thread.join(timeout=1.0)
+    def _process_queue(self):
+        """处理队列中的请求（在单独的线程中运行）"""
+        while self.running:
+            try:
+                # 尝试从队列获取一个项目，如果队列为空，等待1秒后重试
+                try:
+                    item_id = self.queue.get(timeout=1.0)
+                except queue.Empty:
+                    continue
+                # 获取队列项并更新状态
+                queue_item = self.queue_items[item_id]
+                queue_item.status = "processing"
+                queue_item.started_at = time.time()
+                # 处理请求
+                try:
+                    if self.evaluator:
+                        result = self.evaluator(queue_item.input_data)
+                        queue_item.result = result
+                        queue_item.status = "completed"
+                    else:
+                        queue_item.status = "error"
+                        queue_item.result = {"status": "Error", "error": "No evaluator function set"}
+                except Exception as e:
+                    queue_item.status = "error"
+                    queue_item.result = {"status": "Exception", "error": str(e)}
+                # 更新完成时间
+                queue_item.completed_at = time.time()
+                # 标记任务为完成
+                self.queue.task_done()
+            except Exception as e:
+                print(f"Error in queue processing: {e}")
+                time.sleep(1)  # 防止过度CPU使用
+    def get_queue_status(self):
+        """获取队列状态信息
+        Returns:
+            dict: 包含队列状态的字典
+        """
+        queued = 0
+        processing = 0
+        completed = 0
+        error = 0
+        # 统计各状态的数量
+        for item in self.queue_items.values():
+            if item.status == "queued":
+                queued += 1
+            elif item.status == "processing":
+                processing += 1
+            elif item.status == "completed":
+                completed += 1
+            elif item.status == "error":
+                error += 1
+        # 获取队列中的项目，仅包括等待和处理中的
+        queue_items = []
+        for item in self.queue_items.values():
+            if item.status in ["queued", "processing"]:
+                queue_items.append({
+                    "id": item.id,
+                    "status": item.status,
+                    "created_at": item.created_at,
+                    "started_at": item.started_at,
+                })
+        # 按创建时间排序
+        queue_items.sort(key=lambda x: x["created_at"])
+        return {
+            "stats": {
+                "queued": queued,
+                "processing": processing,
+                "completed": completed,
+                "error": error,
+                "total": len(self.queue_items)
+            },
+            "queue_items": queue_items
+        }
+    def get_result(self, item_id):
+        """获取特定请求的结果
+        Args:
+            item_id: 请求的唯一ID
+        Returns:
+            dict: 请求的状态和结果
+        """
+        if item_id in self.queue_items:
+            item = self.queue_items[item_id]
+            return {
+                "id": item.id,
+                "status": item.status,
+                "result": item.result,
+                "created_at": item.created_at,
+                "started_at": item.started_at,
+                "completed_at": item.completed_at
+            }
+        return {"status": "not_found", "error": "Request ID not found"}
+    def clean_completed(self, max_age=3600):
+        """清理完成的请求
+        Args:
+            max_age: 最大保留时间（秒），默认为1小时
+        """
+        current_time = time.time()
+        to_remove = []
+        for item_id, item in self.queue_items.items():
+            if item.status in ["completed", "error"]:
+                if item.completed_at and (current_time - item.completed_at) > max_age:
+                    to_remove.append(item_id)
+        for item_id in to_remove:
+            del self.queue_items[item_id]

src/ui.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import gradio as gr
+import json
+import time
+from typing import Dict, Any, List, Optional
+class EvaluationUI:
+    """处理评估服务的Gradio界面"""
+    def __init__(self, queue_manager):
+        """初始化UI
+        Args:
+            queue_manager: 队列管理器实例
+        """
+        self.queue_manager = queue_manager
+        self.app = None
+    def submit_evaluation(self, input_data):
+        """提交评估请求
+        Args:
+            input_data: 输入JSON数据
+        Returns:
+            dict: 包含请求ID和状态的字典
+        """
+        try:
+            # 解析JSON
+            if isinstance(input_data, str):
+                input_data = json.loads(input_data)
+            # 提交到队列
+            request_id = self.queue_manager.enqueue(input_data)
+            # 返回请求ID
+            return {"request_id": request_id, "status": "queued"}
+        except Exception as e:
+            return {"status": "error", "message": str(e)}
+    def get_evaluation_result(self, request_id):
+        """获取评估结果
+        Args:
+            request_id: 请求ID
+        Returns:
+            dict: 评估结果
+        """
+        return self.queue_manager.get_result(request_id)
+    def get_queue_status(self):
+        """获取队列状态
+        Returns:
+            dict: 队列状态信息
+        """
+        return self.queue_manager.get_queue_status()
+    def render_queue_status(self):
+        """渲染队列状态HTML
+        Returns:
+            str: 队列状态的HTML表示
+        """
+        status = self.queue_manager.get_queue_status()
+        stats = status["stats"]
+        queue_items = status["queue_items"]
+        html = f"""
+        <div style="padding: 10px; background-color: #f5f5f5; border-radius: 5px; margin-bottom: 20px;">
+            <h3>队列状态</h3>
+            <div style="display: flex; justify-content: space-between; margin-bottom: 15px;">
+                <div style="text-align: center; padding: 5px; background-color: #e0f7fa; border-radius: 5px; flex: 1; margin-right: 5px;">
+                    <div style="font-weight: bold; font-size: 24px;">{stats['queued']}</div>
+                    <div>等待中</div>
+                </div>
+                <div style="text-align: center; padding: 5px; background-color: #fff9c4; border-radius: 5px; flex: 1; margin-right: 5px;">
+                    <div style="font-weight: bold; font-size: 24px;">{stats['processing']}</div>
+                    <div>处理中</div>
+                </div>
+                <div style="text-align: center; padding: 5px; background-color: #c8e6c9; border-radius: 5px; flex: 1; margin-right: 5px;">
+                    <div style="font-weight: bold; font-size: 24px;">{stats['completed']}</div>
+                    <div>已完成</div>
+                </div>
+                <div style="text-align: center; padding: 5px; background-color: #ffcdd2; border-radius: 5px; flex: 1;">
+                    <div style="font-weight: bold; font-size: 24px;">{stats['error']}</div>
+                    <div>错误</div>
+                </div>
+            </div>
+        """
+        if queue_items:
+            html += """
+            <h4>当前队列</h4>
+            <table style="width: 100%; border-collapse: collapse;">
+                <tr style="background-color: #e0e0e0;">
+                    <th style="padding: 8px; text-align: left; border: 1px solid #ddd;">ID</th>
+                    <th style="padding: 8px; text-align: left; border: 1px solid #ddd;">状态</th>
+                    <th style="padding: 8px; text-align: left; border: 1px solid #ddd;">等待时间</th>
+                </tr>
+            """
+            current_time = time.time()
+            for item in queue_items:
+                status_color = "#fff9c4" if item["status"] == "processing" else "#e0f7fa"
+                wait_time = current_time - item["created_at"]
+                wait_time_str = f"{int(wait_time // 60)}分{int(wait_time % 60)}秒"
+                html += f"""
+                <tr>
+                    <td style="padding: 8px; text-align: left; border: 1px solid #ddd;">{item['id'][:8]}...</td>
+                    <td style="padding: 8px; text-align: left; border: 1px solid #ddd; background-color: {status_color};">
+                        {"处理中" if item["status"] == "processing" else "等待中"}
+                    </td>
+                    <td style="padding: 8px; text-align: left; border: 1px solid #ddd;">{wait_time_str}</td>
+                </tr>
+                """
+            html += "</table>"
+        else:
+            html += "<p>队列为空</p>"
+        html += "</div>"
+        return html
+    def create_evaluation_interface(self):
+        """创建评估接口
+        Returns:
+            gr.Interface: Gradio接口
+        """
+        # 创建用于直接提交JSON的接口
+        with gr.Blocks(title="代码评估服务") as app:
+            with gr.Row():
+                with gr.Column(scale=3):
+                    gr.Markdown("# 代码评估服务")
+                    gr.Markdown("支持多种编程语言的代码评估服务，使用消息队列处理并发请求。")
+                    # 输入JSON
+                    json_input = gr.JSON(
+                        label="输入JSON",
+                        value=[{"language": "python", "processed_completions": ["def add(a, b):\n    return a + b"], "prompt": "", "tests": "assert add(1, 2) == 3"}]
+                    )
+                    # 提交按钮
+                    submit_btn = gr.Button("提交评估")
+                    # 结果输出
+                    result_output = gr.JSON(label="评估结果")
+                with gr.Column(scale=2):
+                    # 队列状态
+                    queue_status = gr.HTML(label="队列状态")
+                    refresh_btn = gr.Button("刷新队列状态")
+                    # 获取特定结果
+                    with gr.Row():
+                        request_id_input = gr.Textbox(label="请求ID")
+                        get_result_btn = gr.Button("获取结果")
+                    # 特定结果输出
+                    specific_result = gr.JSON(label="请求结果")
+            # 事件处理
+            submit_btn.click(
+                fn=self.submit_evaluation,
+                inputs=[json_input],
+                outputs=[result_output]
+            )
+            refresh_btn.click(
+                fn=self.render_queue_status,
+                inputs=[],
+                outputs=[queue_status]
+            )
+            get_result_btn.click(
+                fn=self.get_evaluation_result,
+                inputs=[request_id_input],
+                outputs=[specific_result]
+            )
+            # 定时更新队列状态
+            app.load(fn=self.render_queue_status, inputs=None, outputs=queue_status, every=2)
+        self.app = app
+        return app
+    def create_api_interface(self):
+        """创建API接口
+        Returns:
+            gr.Interface: Gradio接口
+        """
+        return gr.Interface(
+            fn=self.submit_evaluation,
+            inputs=gr.JSON(),
+            outputs=gr.JSON(),
+            title="代码评估API",
+            description="API接口用于提交代码评估请求"
+        )
+    def get_app(self):
+        """获取Gradio应用
+        Returns:
+            gr.Blocks: Gradio应用
+        """
+        if self.app is None:
+            self.create_evaluation_interface()
+        return self.app