Spaces:
Sleeping
Sleeping
new1
Browse files- README.md +42 -0
- app.py +30 -733
- requirements.txt +3 -1
- src/evaluator.py +94 -0
- src/queue_manager.py +194 -0
- src/ui.py +212 -0
README.md
CHANGED
@@ -8,3 +8,45 @@ pinned: false
|
|
8 |
---
|
9 |
|
10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
---
|
9 |
|
10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
11 |
+
|
12 |
+
# 代码评估服务
|
13 |
+
|
14 |
+
这是一个支持多种编程语言的代码评估服务,专为Hugging Face Spaces设计。
|
15 |
+
|
16 |
+
## 功能特点
|
17 |
+
|
18 |
+
- 支持多种编程语言的代码评估
|
19 |
+
- 使用消息队列处理并发请求
|
20 |
+
- 实时显示队列状态
|
21 |
+
- 提供API接口,便于集成
|
22 |
+
|
23 |
+
## 系统架构
|
24 |
+
|
25 |
+
系统采用模块化设计,主要由以下组件构成:
|
26 |
+
|
27 |
+
- **Queue Manager**: 处理请求队列,实现请求排队和顺序处理
|
28 |
+
- **Evaluator**: 负责代码评估逻辑,支持多种编程语言
|
29 |
+
- **UI**: 基于Gradio的用户界面,展示队列状态和评估结果
|
30 |
+
|
31 |
+
## 使用方法
|
32 |
+
|
33 |
+
1. 在Gradio界面中,提交代码评估请求
|
34 |
+
2. 系统将请求加入队列并返回请求ID
|
35 |
+
3. 使用请求ID查询评估结果
|
36 |
+
4. 实时查看队列状态
|
37 |
+
|
38 |
+
## 文件结构
|
39 |
+
|
40 |
+
```
|
41 |
+
.
|
42 |
+
├── app.py # 主应用入口
|
43 |
+
├── src/
|
44 |
+
│ ├── __init__.py # 包初始化
|
45 |
+
│ ├── containerized_eval.py # 容器化评估工具
|
46 |
+
│ ├── evaluator.py # 代码评估逻辑
|
47 |
+
│ ├── queue_manager.py # 队列管理器
|
48 |
+
│ ├── ui.py # 用户界面组件
|
49 |
+
│ └── ... # 其他语言评估模块
|
50 |
+
├── requirements.txt # 依赖项
|
51 |
+
└── Dockerfile # 容器配置
|
52 |
+
```
|
app.py
CHANGED
@@ -1,21 +1,9 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import json
|
3 |
-
import importlib
|
4 |
import os
|
5 |
import sys
|
6 |
from pathlib import Path
|
7 |
-
import
|
8 |
-
import multiprocessing
|
9 |
-
import time
|
10 |
-
import threading
|
11 |
-
import queue
|
12 |
-
import uuid
|
13 |
-
import numpy as np
|
14 |
-
from datetime import datetime
|
15 |
-
from tqdm.auto import tqdm
|
16 |
-
from src.containerized_eval import eval_string_script
|
17 |
|
18 |
-
#
|
19 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
20 |
src_dir = os.path.join(current_dir, "src")
|
21 |
if current_dir not in sys.path:
|
@@ -23,732 +11,41 @@ if current_dir not in sys.path:
|
|
23 |
if src_dir not in sys.path:
|
24 |
sys.path.append(src_dir)
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
task_status = {}
|
30 |
-
# List to store task history, max 200 tasks
|
31 |
-
task_history = []
|
32 |
-
# Lock for shared resources
|
33 |
-
lock = threading.Lock()
|
34 |
-
# Number of worker threads
|
35 |
-
worker_threads = 1 # Process only one task at a time
|
36 |
-
# Flag for running background threads
|
37 |
-
running = True
|
38 |
-
# Mapping from task type to processing time
|
39 |
-
task_type_times = {}
|
40 |
|
41 |
-
def
|
42 |
-
"""
|
43 |
-
|
44 |
-
|
45 |
-
# Use get with a timeout to prevent busy waiting
|
46 |
-
try:
|
47 |
-
task_id, input_data, request_time = task_queue.get(timeout=0.5)
|
48 |
-
except queue.Empty:
|
49 |
-
continue
|
50 |
-
|
51 |
-
try:
|
52 |
-
with lock:
|
53 |
-
if task_id not in task_status:
|
54 |
-
# Task was cancelled or removed
|
55 |
-
task_queue.task_done()
|
56 |
-
continue
|
57 |
-
|
58 |
-
task_status[task_id]['status'] = 'processing'
|
59 |
-
task_status[task_id]['start_time'] = time.time()
|
60 |
-
|
61 |
-
if isinstance(input_data, list) and len(input_data) > 0:
|
62 |
-
sample_task = input_data[0]
|
63 |
-
language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
|
64 |
-
task_size = len(input_data)
|
65 |
-
task_complexity = _estimate_task_complexity(input_data)
|
66 |
-
|
67 |
-
with lock:
|
68 |
-
task_status[task_id]['estimated_factors'] = {
|
69 |
-
'language': language,
|
70 |
-
'size': task_size,
|
71 |
-
'complexity': task_complexity
|
72 |
-
}
|
73 |
-
|
74 |
-
result = evaluate(input_data)
|
75 |
-
|
76 |
-
end_time = time.time()
|
77 |
-
process_time = end_time - task_status[task_id]['start_time']
|
78 |
-
|
79 |
-
with lock:
|
80 |
-
if task_id in task_status: # Check if task still exists
|
81 |
-
task_status[task_id]['status'] = 'completed'
|
82 |
-
task_status[task_id]['result'] = result
|
83 |
-
task_status[task_id]['end_time'] = end_time
|
84 |
-
task_status[task_id]['process_time'] = process_time
|
85 |
-
|
86 |
-
if 'estimated_factors' in task_status[task_id]:
|
87 |
-
factors = task_status[task_id]['estimated_factors']
|
88 |
-
key = f"{factors['language']}_{factors['complexity']}"
|
89 |
-
|
90 |
-
if key not in task_type_times:
|
91 |
-
task_type_times[key] = []
|
92 |
-
|
93 |
-
task_type_times[key].append(process_time / factors['size'])
|
94 |
-
if len(task_type_times[key]) > 10:
|
95 |
-
task_type_times[key] = task_type_times[key][-10:]
|
96 |
-
|
97 |
-
task_history.append({
|
98 |
-
'task_id': task_id,
|
99 |
-
'request_time': request_time,
|
100 |
-
'process_time': process_time,
|
101 |
-
'status': 'completed',
|
102 |
-
'factors': task_status[task_id].get('estimated_factors', {})
|
103 |
-
})
|
104 |
-
while len(task_history) > 200:
|
105 |
-
task_history.pop(0)
|
106 |
-
except Exception as e:
|
107 |
-
print(f"Error processing task {task_id}: {str(e)}")
|
108 |
-
with lock:
|
109 |
-
if task_id in task_status:
|
110 |
-
task_status[task_id]['status'] = 'error'
|
111 |
-
task_status[task_id]['error'] = str(e)
|
112 |
-
task_status[task_id]['end_time'] = time.time()
|
113 |
-
finally:
|
114 |
-
task_queue.task_done()
|
115 |
-
|
116 |
-
except Exception as e:
|
117 |
-
print(f"Critical error in queue processor: {str(e)}")
|
118 |
-
time.sleep(1) # Avoid tight loop in case of persistent errors
|
119 |
-
|
120 |
-
def _estimate_task_complexity(tasks):
|
121 |
-
"""Estimate task complexity
|
122 |
-
|
123 |
-
Returns: 'simple', 'medium', or 'complex'
|
124 |
-
"""
|
125 |
-
total_code_length = 0
|
126 |
-
count = 0
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
prompt = task.get('prompt', '')
|
131 |
-
tests = task.get('tests', '')
|
132 |
-
completions = task.get('processed_completions', [])
|
133 |
-
|
134 |
-
code_length = len(prompt) + len(tests)
|
135 |
-
if completions:
|
136 |
-
code_length += sum(len(comp) for comp in completions)
|
137 |
-
|
138 |
-
total_code_length += code_length
|
139 |
-
count += 1
|
140 |
|
141 |
-
|
142 |
-
|
|
|
143 |
|
144 |
-
|
|
|
|
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
elif avg_length < 5000:
|
149 |
-
return 'medium'
|
150 |
-
else:
|
151 |
-
return 'complex'
|
152 |
-
|
153 |
-
def evaluate(input_data):
|
154 |
-
"""Main function for code evaluation"""
|
155 |
-
try:
|
156 |
-
if not isinstance(input_data, list):
|
157 |
-
return {"status": "Exception", "error": "Input must be a list"}
|
158 |
-
|
159 |
-
results = []
|
160 |
-
|
161 |
-
# Use a moderate number of workers for all language tests to ensure stability
|
162 |
-
# This prevents resource contention regardless of language
|
163 |
-
max_workers = max(1, min(multiprocessing.cpu_count() // 2, 4))
|
164 |
-
|
165 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
166 |
-
future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
|
167 |
-
for future in concurrent.futures.as_completed(future_to_item):
|
168 |
-
item = future_to_item[future]
|
169 |
-
try:
|
170 |
-
result = future.result()
|
171 |
-
item.update(result)
|
172 |
-
results.append(item)
|
173 |
-
except Exception as e:
|
174 |
-
item.update({"status": "Exception", "error": str(e)})
|
175 |
-
results.append(item)
|
176 |
-
return results
|
177 |
-
|
178 |
-
except Exception as e:
|
179 |
-
return {"status": "Exception", "error": str(e)}
|
180 |
-
|
181 |
-
def evaluate_single_case(input_data):
|
182 |
-
"""Evaluate a single code case"""
|
183 |
-
try:
|
184 |
-
if not isinstance(input_data, dict):
|
185 |
-
return {"status": "Exception", "error": "Input item must be a dictionary"}
|
186 |
-
|
187 |
-
language = input_data.get('language')
|
188 |
-
completions = input_data.get('processed_completions', [])
|
189 |
-
|
190 |
-
if not completions:
|
191 |
-
return {"status": "Exception", "error": "No code provided"}
|
192 |
-
|
193 |
-
# Use a retry mechanism for all languages for better reliability
|
194 |
-
max_retries = 2 # One retry for all languages
|
195 |
-
|
196 |
-
results = []
|
197 |
-
for comp in completions:
|
198 |
-
code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
|
199 |
-
|
200 |
-
# Try up to max_retries + 1 times for all test cases
|
201 |
-
for attempt in range(max_retries + 1):
|
202 |
-
result = evaluate_code(code, language)
|
203 |
-
|
204 |
-
# If success or last attempt, return/record the result
|
205 |
-
if result["status"] == "OK" or attempt == max_retries:
|
206 |
-
if result["status"] == "OK":
|
207 |
-
return result
|
208 |
-
results.append(result)
|
209 |
-
break
|
210 |
-
|
211 |
-
# For retries, briefly wait to allow resources to stabilize
|
212 |
-
time.sleep(0.3)
|
213 |
-
|
214 |
-
return results[0]
|
215 |
-
|
216 |
-
except Exception as e:
|
217 |
-
return {"status": "Exception", "error": str(e)}
|
218 |
-
|
219 |
-
def evaluate_code(code, language):
|
220 |
-
"""Evaluate code in a specific language"""
|
221 |
-
try:
|
222 |
-
result = eval_string_script(language, code)
|
223 |
-
return result
|
224 |
-
|
225 |
-
except Exception as e:
|
226 |
-
return {"status": "Exception", "error": str(e)}
|
227 |
-
|
228 |
-
def synchronous_evaluate(input_data):
|
229 |
-
"""Synchronously evaluate code, compatible with original interface"""
|
230 |
-
try:
|
231 |
-
if isinstance(input_data, list) and len(input_data) > 0:
|
232 |
-
sample_task = input_data[0]
|
233 |
-
language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
|
234 |
-
task_size = len(input_data)
|
235 |
-
task_complexity = _estimate_task_complexity(input_data)
|
236 |
-
else:
|
237 |
-
language = 'unknown'
|
238 |
-
task_size = 1
|
239 |
-
task_complexity = 'medium'
|
240 |
-
|
241 |
-
estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
|
242 |
-
estimated_total_time = estimated_time_per_task * task_size
|
243 |
-
|
244 |
-
queue_info = get_queue_status()
|
245 |
-
waiting_tasks = queue_info['waiting_tasks']
|
246 |
-
|
247 |
-
task_id = str(uuid.uuid4())
|
248 |
-
request_time = time.time()
|
249 |
-
|
250 |
-
with lock:
|
251 |
-
task_status[task_id] = {
|
252 |
-
'status': 'queued',
|
253 |
-
'queued_time': request_time,
|
254 |
-
'queue_position': task_queue.qsize() + 1,
|
255 |
-
'synchronous': True,
|
256 |
-
'estimated_factors': {
|
257 |
-
'language': language,
|
258 |
-
'size': task_size,
|
259 |
-
'complexity': task_complexity
|
260 |
-
},
|
261 |
-
'estimated_time': estimated_total_time
|
262 |
-
}
|
263 |
-
|
264 |
-
task_queue.put((task_id, input_data, request_time))
|
265 |
-
|
266 |
-
# Set a reasonable timeout to avoid hanging
|
267 |
-
max_wait_time = max(300, estimated_total_time * 2) # At least 5 minutes or double the estimated time
|
268 |
-
start_wait = time.time()
|
269 |
-
|
270 |
while True:
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
task_status.pop(task_id, None)
|
277 |
-
return result
|
278 |
-
elif status == 'error':
|
279 |
-
error = task_status[task_id].get('error', 'Unknown error')
|
280 |
-
task_status.pop(task_id, None)
|
281 |
-
return {"status": "Exception", "error": error}
|
282 |
-
else:
|
283 |
-
# Task somehow disappeared
|
284 |
-
return {"status": "Exception", "error": "Task was lost during processing"}
|
285 |
-
|
286 |
-
# Check if we've waited too long
|
287 |
-
if time.time() - start_wait > max_wait_time:
|
288 |
-
with lock:
|
289 |
-
if task_id in task_status:
|
290 |
-
task_status[task_id]['status'] = 'error'
|
291 |
-
task_status[task_id]['error'] = f"Task timed out after {max_wait_time} seconds"
|
292 |
-
return {"status": "Exception", "error": f"Evaluation timed out after {max_wait_time} seconds"}
|
293 |
-
|
294 |
-
time.sleep(0.1)
|
295 |
-
|
296 |
-
except Exception as e:
|
297 |
-
print(f"Error in synchronous_evaluate: {str(e)}")
|
298 |
-
return {"status": "Exception", "error": str(e)}
|
299 |
-
|
300 |
-
def _get_estimated_time_for_task(language, complexity):
|
301 |
-
"""Get estimated processing time for a specific task type"""
|
302 |
-
key = f"{language}_{complexity}"
|
303 |
-
|
304 |
-
if key in task_type_times and len(task_type_times[key]) > 0:
|
305 |
-
return np.median(task_type_times[key])
|
306 |
-
|
307 |
-
if complexity == 'simple':
|
308 |
-
return 1.0
|
309 |
-
elif complexity == 'medium':
|
310 |
-
return 3.0
|
311 |
-
else: # complex
|
312 |
-
return 8.0
|
313 |
-
|
314 |
-
def enqueue_task(input_data):
|
315 |
-
"""Add task to queue"""
|
316 |
-
if isinstance(input_data, list) and len(input_data) > 0:
|
317 |
-
sample_task = input_data[0]
|
318 |
-
language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
|
319 |
-
task_size = len(input_data)
|
320 |
-
task_complexity = _estimate_task_complexity(input_data)
|
321 |
-
else:
|
322 |
-
language = 'unknown'
|
323 |
-
task_size = 1
|
324 |
-
task_complexity = 'medium'
|
325 |
-
|
326 |
-
estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
|
327 |
-
estimated_total_time = estimated_time_per_task * task_size
|
328 |
-
|
329 |
-
task_id = str(uuid.uuid4())
|
330 |
-
request_time = time.time()
|
331 |
-
|
332 |
-
# Get current queue size before adding new task
|
333 |
-
current_queue_size = task_queue.qsize()
|
334 |
-
|
335 |
-
with lock:
|
336 |
-
task_status[task_id] = {
|
337 |
-
'status': 'queued',
|
338 |
-
'queued_time': request_time,
|
339 |
-
'queue_position': current_queue_size + 1,
|
340 |
-
'estimated_factors': {
|
341 |
-
'language': language,
|
342 |
-
'size': task_size,
|
343 |
-
'complexity': task_complexity
|
344 |
-
},
|
345 |
-
'estimated_time': estimated_total_time
|
346 |
-
}
|
347 |
-
|
348 |
-
queue_info = get_queue_status()
|
349 |
-
est_wait = queue_info['estimated_wait']
|
350 |
-
|
351 |
-
task_queue.put((task_id, input_data, request_time))
|
352 |
-
|
353 |
-
return {
|
354 |
-
'task_id': task_id,
|
355 |
-
'status': 'queued',
|
356 |
-
'queue_position': task_status[task_id]['queue_position'],
|
357 |
-
'estimated_wait': est_wait,
|
358 |
-
'estimated_processing': estimated_total_time
|
359 |
-
}
|
360 |
-
|
361 |
-
def check_status(task_id):
|
362 |
-
"""Check task status"""
|
363 |
-
with lock:
|
364 |
-
if task_id not in task_status:
|
365 |
-
return {'status': 'not_found'}
|
366 |
-
|
367 |
-
status_info = task_status[task_id].copy()
|
368 |
-
|
369 |
-
if status_info['status'] in ['completed', 'error'] and time.time() - status_info.get('end_time', 0) > 3600:
|
370 |
-
task_status.pop(task_id, None)
|
371 |
-
|
372 |
-
return status_info
|
373 |
-
|
374 |
-
def get_queue_status():
|
375 |
-
"""Get queue status"""
|
376 |
-
with lock:
|
377 |
-
# Get all tasks that are either queued or processing
|
378 |
-
queued_tasks = [t for t in task_status.values() if t['status'] == 'queued']
|
379 |
-
processing_tasks = [t for t in task_status.values() if t['status'] == 'processing']
|
380 |
-
|
381 |
-
# Get the actual queue size from the queue object
|
382 |
-
actual_queue_size = task_queue.qsize()
|
383 |
-
|
384 |
-
# Use the larger of actual queue size or queued tasks count
|
385 |
-
queue_size = max(actual_queue_size, len(queued_tasks))
|
386 |
-
active_tasks = len(processing_tasks)
|
387 |
-
waiting_tasks = queue_size
|
388 |
-
|
389 |
-
# Clean up completed or error tasks that are older than 1 hour
|
390 |
-
current_time = time.time()
|
391 |
-
for task_id in list(task_status.keys()):
|
392 |
-
if task_status[task_id]['status'] in ['completed', 'error']:
|
393 |
-
if current_time - task_status[task_id].get('end_time', 0) > 3600:
|
394 |
-
task_status.pop(task_id, None)
|
395 |
-
|
396 |
-
remaining_processing_time = 0
|
397 |
-
for task in processing_tasks:
|
398 |
-
if 'start_time' in task and 'estimated_time' in task:
|
399 |
-
elapsed = time.time() - task['start_time']
|
400 |
-
remaining = max(0, task['estimated_time'] - elapsed)
|
401 |
-
remaining_processing_time += remaining
|
402 |
-
else:
|
403 |
-
remaining_processing_time += 2
|
404 |
-
|
405 |
-
if active_tasks > 0:
|
406 |
-
remaining_processing_time = remaining_processing_time / min(active_tasks, worker_threads)
|
407 |
-
|
408 |
-
queued_processing_time = 0
|
409 |
-
for task in queued_tasks:
|
410 |
-
if 'estimated_time' in task:
|
411 |
-
queued_processing_time += task['estimated_time']
|
412 |
-
else:
|
413 |
-
queued_processing_time += 5
|
414 |
-
|
415 |
-
estimated_wait = remaining_processing_time + queued_processing_time
|
416 |
-
|
417 |
-
if task_history:
|
418 |
-
prediction_ratios = []
|
419 |
-
for task in task_history:
|
420 |
-
if 'factors' in task and 'estimated_time' in task:
|
421 |
-
prediction_ratios.append(task['process_time'] / task['estimated_time'])
|
422 |
-
|
423 |
-
if prediction_ratios:
|
424 |
-
correction_factor = np.median(prediction_ratios)
|
425 |
-
correction_factor = max(0.5, min(2.0, correction_factor))
|
426 |
-
estimated_wait *= correction_factor
|
427 |
-
|
428 |
-
estimated_wait = max(0.1, estimated_wait)
|
429 |
-
if waiting_tasks == 0 and active_tasks == 0:
|
430 |
-
estimated_wait = 0
|
431 |
-
|
432 |
-
recent_tasks = task_history[-5:] if task_history else []
|
433 |
-
|
434 |
-
return {
|
435 |
-
'queue_size': queue_size,
|
436 |
-
'active_tasks': active_tasks,
|
437 |
-
'waiting_tasks': waiting_tasks,
|
438 |
-
'worker_threads': worker_threads,
|
439 |
-
'estimated_wait': estimated_wait,
|
440 |
-
'recent_tasks': recent_tasks
|
441 |
-
}
|
442 |
-
|
443 |
-
def format_time(seconds):
|
444 |
-
"""Format time into readable format"""
|
445 |
-
if seconds < 60:
|
446 |
-
return f"{seconds:.1f} seconds"
|
447 |
-
elif seconds < 3600:
|
448 |
-
minutes = int(seconds / 60)
|
449 |
-
seconds = seconds % 60
|
450 |
-
return f"{minutes}m {seconds:.1f}s"
|
451 |
-
else:
|
452 |
-
hours = int(seconds / 3600)
|
453 |
-
minutes = int((seconds % 3600) / 60)
|
454 |
-
return f"{hours}h {minutes}m"
|
455 |
-
|
456 |
-
def ui_get_queue_info():
|
457 |
-
"""Get queue info for UI"""
|
458 |
-
queue_info = get_queue_status()
|
459 |
-
|
460 |
-
tasks_html = ""
|
461 |
-
for task in reversed(queue_info['recent_tasks']):
|
462 |
-
tasks_html += f"""
|
463 |
-
<tr>
|
464 |
-
<td>{task['task_id'][:8]}...</td>
|
465 |
-
<td>{datetime.fromtimestamp(task['request_time']).strftime('%H:%M:%S')}</td>
|
466 |
-
<td>{format_time(task['process_time'])}</td>
|
467 |
-
</tr>
|
468 |
-
"""
|
469 |
-
|
470 |
-
if not tasks_html:
|
471 |
-
tasks_html = """
|
472 |
-
<tr>
|
473 |
-
<td colspan="3" style="text-align: center; padding: 20px;">No historical tasks</td>
|
474 |
-
</tr>
|
475 |
-
"""
|
476 |
-
|
477 |
-
return f"""
|
478 |
-
<div class="dashboard">
|
479 |
-
<div class="queue-info-card main-card">
|
480 |
-
<h3 class="card-title">Queue Status Monitor</h3>
|
481 |
-
<div class="queue-stats">
|
482 |
-
<div class="stat-item">
|
483 |
-
<div class="stat-value">{queue_info['waiting_tasks']}</div>
|
484 |
-
<div class="stat-label">Waiting</div>
|
485 |
-
</div>
|
486 |
-
<div class="stat-item">
|
487 |
-
<div class="stat-value">{queue_info['active_tasks']}</div>
|
488 |
-
<div class="stat-label">Processing</div>
|
489 |
-
</div>
|
490 |
-
<div class="stat-item">
|
491 |
-
<div class="stat-value">{queue_info['worker_threads']}</div>
|
492 |
-
<div class="stat-label">Worker Threads</div>
|
493 |
-
</div>
|
494 |
-
</div>
|
495 |
-
|
496 |
-
<div class="wait-time">
|
497 |
-
<p><b>Current Estimated Wait Time:</b> {format_time(queue_info['estimated_wait'])}</p>
|
498 |
-
<p class="last-update"><small>Last update: {datetime.now().strftime('%H:%M:%S')}</small></p>
|
499 |
-
</div>
|
500 |
-
</div>
|
501 |
-
|
502 |
-
<div class="queue-info-card history-card">
|
503 |
-
<h3 class="card-title">Recently Processed Tasks</h3>
|
504 |
-
<table class="recent-tasks">
|
505 |
-
<thead>
|
506 |
-
<tr>
|
507 |
-
<th>Task ID</th>
|
508 |
-
<th>Request Time</th>
|
509 |
-
<th>Processing Time</th>
|
510 |
-
</tr>
|
511 |
-
</thead>
|
512 |
-
<tbody>
|
513 |
-
{tasks_html}
|
514 |
-
</tbody>
|
515 |
-
</table>
|
516 |
-
</div>
|
517 |
-
</div>
|
518 |
-
"""
|
519 |
-
|
520 |
-
def launch_workers():
|
521 |
-
"""Launch worker threads"""
|
522 |
-
global running
|
523 |
-
running = True
|
524 |
-
|
525 |
-
for _ in range(worker_threads):
|
526 |
-
worker = threading.Thread(target=queue_processor)
|
527 |
-
worker.daemon = True
|
528 |
-
worker.start()
|
529 |
-
|
530 |
-
# Custom CSS
|
531 |
-
custom_css = """
|
532 |
-
.container {
|
533 |
-
max-width: 1200px;
|
534 |
-
margin: 0 auto;
|
535 |
-
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
536 |
-
}
|
537 |
-
|
538 |
-
.dashboard {
|
539 |
-
display: flex;
|
540 |
-
flex-direction: column;
|
541 |
-
gap: 20px;
|
542 |
-
}
|
543 |
-
|
544 |
-
.card-title {
|
545 |
-
color: #333;
|
546 |
-
border-bottom: 2px solid #ddd;
|
547 |
-
padding-bottom: 10px;
|
548 |
-
margin-top: 0;
|
549 |
-
}
|
550 |
-
|
551 |
-
.status-card, .queue-info-card {
|
552 |
-
background: #fff;
|
553 |
-
border-radius: 12px;
|
554 |
-
padding: 20px;
|
555 |
-
margin: 10px 0;
|
556 |
-
box-shadow: 0 4px 15px rgba(0,0,0,0.08);
|
557 |
-
}
|
558 |
-
|
559 |
-
.main-card {
|
560 |
-
border-top: 5px solid #4285f4;
|
561 |
-
}
|
562 |
-
|
563 |
-
.history-card {
|
564 |
-
border-top: 5px solid #34a853;
|
565 |
-
}
|
566 |
-
|
567 |
-
.status-card.success {
|
568 |
-
background: #e7f5e7;
|
569 |
-
border-left: 5px solid #28a745;
|
570 |
-
}
|
571 |
-
|
572 |
-
.status-card.error {
|
573 |
-
background: #f8d7da;
|
574 |
-
border-left: 5px solid #dc3545;
|
575 |
-
}
|
576 |
-
|
577 |
-
.error-message {
|
578 |
-
color: #dc3545;
|
579 |
-
font-weight: bold;
|
580 |
-
padding: 10px;
|
581 |
-
background: #f8d7da;
|
582 |
-
border-radius: 5px;
|
583 |
-
}
|
584 |
-
|
585 |
-
.notice {
|
586 |
-
color: #0c5460;
|
587 |
-
background-color: #d1ecf1;
|
588 |
-
padding: 10px;
|
589 |
-
border-radius: 5px;
|
590 |
-
}
|
591 |
-
|
592 |
-
.queue-stats {
|
593 |
-
display: flex;
|
594 |
-
justify-content: space-around;
|
595 |
-
margin: 20px 0;
|
596 |
-
}
|
597 |
-
|
598 |
-
.stat-item {
|
599 |
-
text-align: center;
|
600 |
-
padding: 15px;
|
601 |
-
background: #f8f9fa;
|
602 |
-
border-radius: 10px;
|
603 |
-
min-width: 120px;
|
604 |
-
transition: transform 0.3s ease;
|
605 |
-
}
|
606 |
-
|
607 |
-
.stat-item:hover {
|
608 |
-
transform: translateY(-5px);
|
609 |
-
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
|
610 |
-
}
|
611 |
-
|
612 |
-
.stat-value {
|
613 |
-
font-size: 32px;
|
614 |
-
font-weight: bold;
|
615 |
-
color: #4285f4;
|
616 |
-
margin-bottom: 5px;
|
617 |
-
}
|
618 |
-
|
619 |
-
.stat-label {
|
620 |
-
color: #5f6368;
|
621 |
-
font-size: 16px;
|
622 |
-
}
|
623 |
-
|
624 |
-
.wait-time {
|
625 |
-
text-align: center;
|
626 |
-
margin: 20px 0;
|
627 |
-
padding: 15px;
|
628 |
-
background: #f1f3f4;
|
629 |
-
border-radius: 8px;
|
630 |
-
font-size: 18px;
|
631 |
-
}
|
632 |
-
|
633 |
-
.last-update {
|
634 |
-
color: #80868b;
|
635 |
-
margin-top: 10px;
|
636 |
-
margin-bottom: 0;
|
637 |
-
}
|
638 |
-
|
639 |
-
.recent-tasks {
|
640 |
-
width: 100%;
|
641 |
-
border-collapse: collapse;
|
642 |
-
margin-top: 15px;
|
643 |
-
background: white;
|
644 |
-
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
645 |
-
}
|
646 |
-
|
647 |
-
.recent-tasks th, .recent-tasks td {
|
648 |
-
border: 1px solid #e0e0e0;
|
649 |
-
padding: 12px 15px;
|
650 |
-
text-align: center;
|
651 |
-
}
|
652 |
-
|
653 |
-
.recent-tasks th {
|
654 |
-
background-color: #f1f3f4;
|
655 |
-
color: #202124;
|
656 |
-
font-weight: 500;
|
657 |
-
}
|
658 |
-
|
659 |
-
.recent-tasks tbody tr:hover {
|
660 |
-
background-color: #f8f9fa;
|
661 |
-
}
|
662 |
-
|
663 |
-
.tabs {
|
664 |
-
margin-top: 20px;
|
665 |
-
}
|
666 |
-
|
667 |
-
button.primary {
|
668 |
-
background-color: #4285f4;
|
669 |
-
color: white;
|
670 |
-
padding: 10px 20px;
|
671 |
-
border: none;
|
672 |
-
border-radius: 4px;
|
673 |
-
cursor: pointer;
|
674 |
-
font-size: 16px;
|
675 |
-
font-weight: 500;
|
676 |
-
transition: background-color 0.3s;
|
677 |
-
}
|
678 |
-
|
679 |
-
button.primary:hover {
|
680 |
-
background-color: #3367d6;
|
681 |
-
}
|
682 |
-
"""
|
683 |
-
|
684 |
-
# Initialize and launch worker threads
|
685 |
-
launch_workers()
|
686 |
-
|
687 |
-
# Create Gradio interface
|
688 |
-
with gr.Blocks(css=custom_css) as demo:
|
689 |
-
gr.Markdown("# Code Evaluation Service")
|
690 |
-
gr.Markdown("Code evaluation service supporting multiple programming languages, using queue mechanism to process requests")
|
691 |
-
|
692 |
-
with gr.Row():
|
693 |
-
with gr.Column(scale=3):
|
694 |
-
# Queue status info card
|
695 |
-
queue_info_html = gr.HTML(ui_get_queue_info())
|
696 |
-
refresh_queue_btn = gr.Button("Refresh Queue Status", variant="primary")
|
697 |
-
|
698 |
-
# Hidden API interface components
|
699 |
-
with gr.Row(visible=False):
|
700 |
-
api_input = gr.JSON()
|
701 |
-
api_output = gr.JSON()
|
702 |
-
|
703 |
-
# Define update function
|
704 |
-
def update_queue_info():
|
705 |
-
try:
|
706 |
-
info = ui_get_queue_info()
|
707 |
-
return info
|
708 |
-
except Exception as e:
|
709 |
-
print(f"Error updating queue info: {str(e)}")
|
710 |
-
return f"""
|
711 |
-
<div class="dashboard">
|
712 |
-
<div class="queue-info-card main-card">
|
713 |
-
<h3 class="card-title">Queue Status Monitor</h3>
|
714 |
-
<div class="queue-stats">
|
715 |
-
<p>Error refreshing queue status. Click refresh to try again.</p>
|
716 |
-
</div>
|
717 |
-
</div>
|
718 |
-
</div>
|
719 |
-
"""
|
720 |
-
|
721 |
-
# Set up periodic refresh with error handling
|
722 |
-
refresh_interval = 3
|
723 |
-
|
724 |
-
def safe_refresh():
|
725 |
-
try:
|
726 |
-
return update_queue_info()
|
727 |
-
except Exception as e:
|
728 |
-
print(f"Safe refresh error: {str(e)}")
|
729 |
-
return queue_info_html.value # Keep existing value on error
|
730 |
-
|
731 |
-
# Update queue info periodically
|
732 |
-
demo.load(safe_refresh, None, queue_info_html, every=refresh_interval)
|
733 |
|
734 |
-
|
735 |
-
|
736 |
|
737 |
-
#
|
738 |
-
|
739 |
-
evaluate_endpoint = demo.load(fn=synchronous_evaluate, inputs=api_input, outputs=api_output, api_name="evaluate")
|
740 |
|
741 |
if __name__ == "__main__":
|
742 |
-
|
743 |
-
|
744 |
-
demo.launch(
|
745 |
-
server_name="0.0.0.0", # Listen on all network interfaces
|
746 |
-
share=False, # No need for sharing link in Spaces
|
747 |
-
show_error=True, # Show detailed error messages
|
748 |
-
debug=True, # Enable debug mode for better error reporting
|
749 |
-
max_threads=10, # Limit number of threads to avoid resource contention
|
750 |
-
quiet=False # Log to console for debugging
|
751 |
-
)
|
752 |
-
finally:
|
753 |
-
# Stop worker threads
|
754 |
-
running = False
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
from pathlib import Path
|
4 |
+
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# 添加当前目录和src目录到模块搜索路径
|
7 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
8 |
src_dir = os.path.join(current_dir, "src")
|
9 |
if current_dir not in sys.path:
|
|
|
11 |
if src_dir not in sys.path:
|
12 |
sys.path.append(src_dir)
|
13 |
|
14 |
+
from src.queue_manager import QueueManager
|
15 |
+
from src.evaluator import evaluate
|
16 |
+
from src.ui import EvaluationUI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
def main():
|
19 |
+
"""主函数,创建并启动应用"""
|
20 |
+
# 创建队列管理器
|
21 |
+
queue_manager = QueueManager()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# 设置评估函数
|
24 |
+
queue_manager.set_evaluator(evaluate)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
# 创建UI
|
27 |
+
ui = EvaluationUI(queue_manager)
|
28 |
+
app = ui.create_evaluation_interface()
|
29 |
|
30 |
+
# 启动清理线程
|
31 |
+
import threading
|
32 |
+
import time
|
33 |
|
34 |
+
def cleanup_thread():
|
35 |
+
"""定期清理已完成的请求"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
while True:
|
37 |
+
try:
|
38 |
+
queue_manager.clean_completed(max_age=3600) # 清理1小时前的已完成请求
|
39 |
+
except Exception as e:
|
40 |
+
print(f"Error in cleanup thread: {e}")
|
41 |
+
time.sleep(600) # 每10分钟运行一次
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
cleanup_thread = threading.Thread(target=cleanup_thread, daemon=True)
|
44 |
+
cleanup_thread.start()
|
45 |
|
46 |
+
# 返回应用
|
47 |
+
return app
|
|
|
48 |
|
49 |
if __name__ == "__main__":
|
50 |
+
app = main()
|
51 |
+
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
gradio==4.44.1
|
|
|
|
|
|
1 |
+
gradio==4.44.1
|
2 |
+
typing_extensions>=4.0.0
|
3 |
+
# dataclasses is built-in for Python 3.7+
|
src/evaluator.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import concurrent.futures
|
2 |
+
import multiprocessing
|
3 |
+
from .containerized_eval import eval_string_script
|
4 |
+
|
5 |
+
def evaluate(input_data):
|
6 |
+
"""评估代码的主函数
|
7 |
+
|
8 |
+
Args:
|
9 |
+
input_data: 列表(批量处理多个测试用例)
|
10 |
+
|
11 |
+
Returns:
|
12 |
+
list: 包含评估结果的列表
|
13 |
+
"""
|
14 |
+
try:
|
15 |
+
if not isinstance(input_data, list):
|
16 |
+
return {"status": "Exception", "error": "Input must be a list"}
|
17 |
+
|
18 |
+
results = []
|
19 |
+
max_workers = multiprocessing.cpu_count() // 2
|
20 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
21 |
+
future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
|
22 |
+
for future in concurrent.futures.as_completed(future_to_item):
|
23 |
+
item = future_to_item[future]
|
24 |
+
try:
|
25 |
+
result = future.result()
|
26 |
+
item.update(result)
|
27 |
+
results.append(item)
|
28 |
+
except Exception as e:
|
29 |
+
item.update({"status": "Exception", "error": str(e)})
|
30 |
+
results.append(item)
|
31 |
+
return results
|
32 |
+
|
33 |
+
except Exception as e:
|
34 |
+
return {"status": "Exception", "error": str(e)}
|
35 |
+
|
36 |
+
def evaluate_single_case(input_data):
|
37 |
+
"""评估单个代码用例
|
38 |
+
|
39 |
+
Args:
|
40 |
+
input_data: 字典(包含代码信息)
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
dict: 包含评估结果的字典
|
44 |
+
"""
|
45 |
+
try:
|
46 |
+
if not isinstance(input_data, dict):
|
47 |
+
return {"status": "Exception", "error": "Input item must be a dictionary"}
|
48 |
+
|
49 |
+
language = input_data.get('language')
|
50 |
+
completions = input_data.get('processed_completions', [])
|
51 |
+
|
52 |
+
if not completions:
|
53 |
+
return {"status": "Exception", "error": "No code provided"}
|
54 |
+
|
55 |
+
results = []
|
56 |
+
for comp in completions:
|
57 |
+
code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
|
58 |
+
result = evaluate_code(code, language)
|
59 |
+
if result["status"] == "OK":
|
60 |
+
return {"status": "pass", "compiled_code": code}
|
61 |
+
print(f'Code failed to compile: \n{code}')
|
62 |
+
result["compiled_code"] = code
|
63 |
+
results.append(result)
|
64 |
+
|
65 |
+
return results[0]
|
66 |
+
|
67 |
+
except Exception as e:
|
68 |
+
return {"status": "Exception", "error": str(e)}
|
69 |
+
|
70 |
+
def evaluate_code(code, language):
|
71 |
+
"""评估特定语言的代码
|
72 |
+
|
73 |
+
Args:
|
74 |
+
code (str): 要评估的代码
|
75 |
+
language (str): 编程语言
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
dict: 包含评估结果的字典
|
79 |
+
"""
|
80 |
+
try:
|
81 |
+
# 使用containerized_eval中的eval_string_script函数
|
82 |
+
result = eval_string_script(language, code)
|
83 |
+
|
84 |
+
if result["exit_code"] == 0:
|
85 |
+
return {"status": "OK", "output": result["stdout"]}
|
86 |
+
else:
|
87 |
+
return {
|
88 |
+
"status": "Error",
|
89 |
+
"error": result["stderr"] if result["stderr"] else "Unknown error",
|
90 |
+
"output": result["stdout"]
|
91 |
+
}
|
92 |
+
|
93 |
+
except Exception as e:
|
94 |
+
return {"status": "Exception", "error": str(e)}
|
src/queue_manager.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import queue
|
2 |
+
import threading
|
3 |
+
import time
|
4 |
+
from dataclasses import dataclass, field
|
5 |
+
from typing import Dict, List, Any, Optional
|
6 |
+
import uuid
|
7 |
+
|
8 |
+
|
9 |
+
@dataclass
|
10 |
+
class QueueItem:
|
11 |
+
"""代表队列中的一个评估请求"""
|
12 |
+
id: str # 唯一标识符
|
13 |
+
input_data: Dict[str, Any] # 输入数据
|
14 |
+
status: str = "queued" # 状态:queued, processing, completed, error
|
15 |
+
result: Optional[Dict[str, Any]] = None # 评估结果
|
16 |
+
created_at: float = field(default_factory=time.time) # 创建时间
|
17 |
+
started_at: Optional[float] = None # 开始处理时间
|
18 |
+
completed_at: Optional[float] = None # 完成时间
|
19 |
+
|
20 |
+
|
21 |
+
class QueueManager:
|
22 |
+
"""管理代码评估请求队列"""
|
23 |
+
|
24 |
+
def __init__(self):
|
25 |
+
self.queue = queue.Queue() # 请求队列
|
26 |
+
self.queue_items = {} # 存储所有队列项,以ID为键
|
27 |
+
self.processing_thread = None # 处理线程
|
28 |
+
self.running = False # 是否正在运行
|
29 |
+
self.evaluator = None # 评估器,将在开始时设置
|
30 |
+
|
31 |
+
def set_evaluator(self, evaluator_func):
|
32 |
+
"""设置评估函数
|
33 |
+
|
34 |
+
Args:
|
35 |
+
evaluator_func: 评估函数,接受输入数据并返回结果
|
36 |
+
"""
|
37 |
+
self.evaluator = evaluator_func
|
38 |
+
|
39 |
+
def enqueue(self, input_data):
|
40 |
+
"""添加新的评估请求到队列
|
41 |
+
|
42 |
+
Args:
|
43 |
+
input_data: 输入数据,包含要评估的代码
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
str: 请求的唯一ID
|
47 |
+
"""
|
48 |
+
item_id = str(uuid.uuid4())
|
49 |
+
queue_item = QueueItem(id=item_id, input_data=input_data)
|
50 |
+
self.queue_items[item_id] = queue_item
|
51 |
+
self.queue.put(item_id)
|
52 |
+
|
53 |
+
# 如果处理线程未运行,则启动它
|
54 |
+
if not self.running:
|
55 |
+
self.start_processing()
|
56 |
+
|
57 |
+
return item_id
|
58 |
+
|
59 |
+
def start_processing(self):
|
60 |
+
"""启动队列处理线程"""
|
61 |
+
if self.processing_thread is None or not self.processing_thread.is_alive():
|
62 |
+
self.running = True
|
63 |
+
self.processing_thread = threading.Thread(target=self._process_queue)
|
64 |
+
self.processing_thread.daemon = True
|
65 |
+
self.processing_thread.start()
|
66 |
+
|
67 |
+
def stop_processing(self):
|
68 |
+
"""停止队列处理"""
|
69 |
+
self.running = False
|
70 |
+
if self.processing_thread and self.processing_thread.is_alive():
|
71 |
+
self.processing_thread.join(timeout=1.0)
|
72 |
+
|
73 |
+
def _process_queue(self):
|
74 |
+
"""处理队列中的请求(在单独的线程中运行)"""
|
75 |
+
while self.running:
|
76 |
+
try:
|
77 |
+
# 尝试从队列获取一个项目,如果队列为空,等待1秒后重试
|
78 |
+
try:
|
79 |
+
item_id = self.queue.get(timeout=1.0)
|
80 |
+
except queue.Empty:
|
81 |
+
continue
|
82 |
+
|
83 |
+
# 获取队列项并更新状态
|
84 |
+
queue_item = self.queue_items[item_id]
|
85 |
+
queue_item.status = "processing"
|
86 |
+
queue_item.started_at = time.time()
|
87 |
+
|
88 |
+
# 处理请求
|
89 |
+
try:
|
90 |
+
if self.evaluator:
|
91 |
+
result = self.evaluator(queue_item.input_data)
|
92 |
+
queue_item.result = result
|
93 |
+
queue_item.status = "completed"
|
94 |
+
else:
|
95 |
+
queue_item.status = "error"
|
96 |
+
queue_item.result = {"status": "Error", "error": "No evaluator function set"}
|
97 |
+
except Exception as e:
|
98 |
+
queue_item.status = "error"
|
99 |
+
queue_item.result = {"status": "Exception", "error": str(e)}
|
100 |
+
|
101 |
+
# 更新完成时间
|
102 |
+
queue_item.completed_at = time.time()
|
103 |
+
|
104 |
+
# 标记任务为完成
|
105 |
+
self.queue.task_done()
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
print(f"Error in queue processing: {e}")
|
109 |
+
time.sleep(1) # 防止过度CPU使用
|
110 |
+
|
111 |
+
def get_queue_status(self):
|
112 |
+
"""获取队列状态信息
|
113 |
+
|
114 |
+
Returns:
|
115 |
+
dict: 包含队列状态的字典
|
116 |
+
"""
|
117 |
+
queued = 0
|
118 |
+
processing = 0
|
119 |
+
completed = 0
|
120 |
+
error = 0
|
121 |
+
|
122 |
+
# 统计各状态的数量
|
123 |
+
for item in self.queue_items.values():
|
124 |
+
if item.status == "queued":
|
125 |
+
queued += 1
|
126 |
+
elif item.status == "processing":
|
127 |
+
processing += 1
|
128 |
+
elif item.status == "completed":
|
129 |
+
completed += 1
|
130 |
+
elif item.status == "error":
|
131 |
+
error += 1
|
132 |
+
|
133 |
+
# 获取队列中的项目,仅包括等待和处理中的
|
134 |
+
queue_items = []
|
135 |
+
for item in self.queue_items.values():
|
136 |
+
if item.status in ["queued", "processing"]:
|
137 |
+
queue_items.append({
|
138 |
+
"id": item.id,
|
139 |
+
"status": item.status,
|
140 |
+
"created_at": item.created_at,
|
141 |
+
"started_at": item.started_at,
|
142 |
+
})
|
143 |
+
|
144 |
+
# 按创建时间排序
|
145 |
+
queue_items.sort(key=lambda x: x["created_at"])
|
146 |
+
|
147 |
+
return {
|
148 |
+
"stats": {
|
149 |
+
"queued": queued,
|
150 |
+
"processing": processing,
|
151 |
+
"completed": completed,
|
152 |
+
"error": error,
|
153 |
+
"total": len(self.queue_items)
|
154 |
+
},
|
155 |
+
"queue_items": queue_items
|
156 |
+
}
|
157 |
+
|
158 |
+
def get_result(self, item_id):
|
159 |
+
"""获取特定请求的结果
|
160 |
+
|
161 |
+
Args:
|
162 |
+
item_id: 请求的唯一ID
|
163 |
+
|
164 |
+
Returns:
|
165 |
+
dict: 请求的状态和结果
|
166 |
+
"""
|
167 |
+
if item_id in self.queue_items:
|
168 |
+
item = self.queue_items[item_id]
|
169 |
+
return {
|
170 |
+
"id": item.id,
|
171 |
+
"status": item.status,
|
172 |
+
"result": item.result,
|
173 |
+
"created_at": item.created_at,
|
174 |
+
"started_at": item.started_at,
|
175 |
+
"completed_at": item.completed_at
|
176 |
+
}
|
177 |
+
return {"status": "not_found", "error": "Request ID not found"}
|
178 |
+
|
179 |
+
def clean_completed(self, max_age=3600):
|
180 |
+
"""清理完成的请求
|
181 |
+
|
182 |
+
Args:
|
183 |
+
max_age: 最大保留时间(秒),默认为1小时
|
184 |
+
"""
|
185 |
+
current_time = time.time()
|
186 |
+
to_remove = []
|
187 |
+
|
188 |
+
for item_id, item in self.queue_items.items():
|
189 |
+
if item.status in ["completed", "error"]:
|
190 |
+
if item.completed_at and (current_time - item.completed_at) > max_age:
|
191 |
+
to_remove.append(item_id)
|
192 |
+
|
193 |
+
for item_id in to_remove:
|
194 |
+
del self.queue_items[item_id]
|
src/ui.py
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import json
|
3 |
+
import time
|
4 |
+
from typing import Dict, Any, List, Optional
|
5 |
+
|
6 |
+
class EvaluationUI:
|
7 |
+
"""处理评估服务的Gradio界面"""
|
8 |
+
|
9 |
+
def __init__(self, queue_manager):
|
10 |
+
"""初始化UI
|
11 |
+
|
12 |
+
Args:
|
13 |
+
queue_manager: 队列管理器实例
|
14 |
+
"""
|
15 |
+
self.queue_manager = queue_manager
|
16 |
+
self.app = None
|
17 |
+
|
18 |
+
def submit_evaluation(self, input_data):
|
19 |
+
"""提交评估请求
|
20 |
+
|
21 |
+
Args:
|
22 |
+
input_data: 输入JSON数据
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
dict: 包含请求ID和状态的字典
|
26 |
+
"""
|
27 |
+
try:
|
28 |
+
# 解析JSON
|
29 |
+
if isinstance(input_data, str):
|
30 |
+
input_data = json.loads(input_data)
|
31 |
+
|
32 |
+
# 提交到队列
|
33 |
+
request_id = self.queue_manager.enqueue(input_data)
|
34 |
+
|
35 |
+
# 返回请求ID
|
36 |
+
return {"request_id": request_id, "status": "queued"}
|
37 |
+
|
38 |
+
except Exception as e:
|
39 |
+
return {"status": "error", "message": str(e)}
|
40 |
+
|
41 |
+
def get_evaluation_result(self, request_id):
|
42 |
+
"""获取评估结果
|
43 |
+
|
44 |
+
Args:
|
45 |
+
request_id: 请求ID
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
dict: 评估结果
|
49 |
+
"""
|
50 |
+
return self.queue_manager.get_result(request_id)
|
51 |
+
|
52 |
+
def get_queue_status(self):
|
53 |
+
"""获取队列状态
|
54 |
+
|
55 |
+
Returns:
|
56 |
+
dict: 队列状态信息
|
57 |
+
"""
|
58 |
+
return self.queue_manager.get_queue_status()
|
59 |
+
|
60 |
+
def render_queue_status(self):
|
61 |
+
"""渲染队列状态HTML
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
str: 队列状态的HTML表示
|
65 |
+
"""
|
66 |
+
status = self.queue_manager.get_queue_status()
|
67 |
+
stats = status["stats"]
|
68 |
+
queue_items = status["queue_items"]
|
69 |
+
|
70 |
+
html = f"""
|
71 |
+
<div style="padding: 10px; background-color: #f5f5f5; border-radius: 5px; margin-bottom: 20px;">
|
72 |
+
<h3>队列状态</h3>
|
73 |
+
<div style="display: flex; justify-content: space-between; margin-bottom: 15px;">
|
74 |
+
<div style="text-align: center; padding: 5px; background-color: #e0f7fa; border-radius: 5px; flex: 1; margin-right: 5px;">
|
75 |
+
<div style="font-weight: bold; font-size: 24px;">{stats['queued']}</div>
|
76 |
+
<div>等待中</div>
|
77 |
+
</div>
|
78 |
+
<div style="text-align: center; padding: 5px; background-color: #fff9c4; border-radius: 5px; flex: 1; margin-right: 5px;">
|
79 |
+
<div style="font-weight: bold; font-size: 24px;">{stats['processing']}</div>
|
80 |
+
<div>处理中</div>
|
81 |
+
</div>
|
82 |
+
<div style="text-align: center; padding: 5px; background-color: #c8e6c9; border-radius: 5px; flex: 1; margin-right: 5px;">
|
83 |
+
<div style="font-weight: bold; font-size: 24px;">{stats['completed']}</div>
|
84 |
+
<div>已完成</div>
|
85 |
+
</div>
|
86 |
+
<div style="text-align: center; padding: 5px; background-color: #ffcdd2; border-radius: 5px; flex: 1;">
|
87 |
+
<div style="font-weight: bold; font-size: 24px;">{stats['error']}</div>
|
88 |
+
<div>错误</div>
|
89 |
+
</div>
|
90 |
+
</div>
|
91 |
+
"""
|
92 |
+
|
93 |
+
if queue_items:
|
94 |
+
html += """
|
95 |
+
<h4>当前队列</h4>
|
96 |
+
<table style="width: 100%; border-collapse: collapse;">
|
97 |
+
<tr style="background-color: #e0e0e0;">
|
98 |
+
<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">ID</th>
|
99 |
+
<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">状态</th>
|
100 |
+
<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">等待时间</th>
|
101 |
+
</tr>
|
102 |
+
"""
|
103 |
+
|
104 |
+
current_time = time.time()
|
105 |
+
for item in queue_items:
|
106 |
+
status_color = "#fff9c4" if item["status"] == "processing" else "#e0f7fa"
|
107 |
+
wait_time = current_time - item["created_at"]
|
108 |
+
wait_time_str = f"{int(wait_time // 60)}分{int(wait_time % 60)}秒"
|
109 |
+
|
110 |
+
html += f"""
|
111 |
+
<tr>
|
112 |
+
<td style="padding: 8px; text-align: left; border: 1px solid #ddd;">{item['id'][:8]}...</td>
|
113 |
+
<td style="padding: 8px; text-align: left; border: 1px solid #ddd; background-color: {status_color};">
|
114 |
+
{"处理中" if item["status"] == "processing" else "等待中"}
|
115 |
+
</td>
|
116 |
+
<td style="padding: 8px; text-align: left; border: 1px solid #ddd;">{wait_time_str}</td>
|
117 |
+
</tr>
|
118 |
+
"""
|
119 |
+
|
120 |
+
html += "</table>"
|
121 |
+
else:
|
122 |
+
html += "<p>队列为空</p>"
|
123 |
+
|
124 |
+
html += "</div>"
|
125 |
+
return html
|
126 |
+
|
127 |
+
def create_evaluation_interface(self):
|
128 |
+
"""创建评估接口
|
129 |
+
|
130 |
+
Returns:
|
131 |
+
gr.Interface: Gradio接口
|
132 |
+
"""
|
133 |
+
# 创建用于直接提交JSON的接口
|
134 |
+
with gr.Blocks(title="代码评估服务") as app:
|
135 |
+
with gr.Row():
|
136 |
+
with gr.Column(scale=3):
|
137 |
+
gr.Markdown("# 代码评估服务")
|
138 |
+
gr.Markdown("支持多种编程语言的代码评估服务,使用消息队列处理并发请求。")
|
139 |
+
|
140 |
+
# 输入JSON
|
141 |
+
json_input = gr.JSON(
|
142 |
+
label="输入JSON",
|
143 |
+
value=[{"language": "python", "processed_completions": ["def add(a, b):\n return a + b"], "prompt": "", "tests": "assert add(1, 2) == 3"}]
|
144 |
+
)
|
145 |
+
|
146 |
+
# 提交按钮
|
147 |
+
submit_btn = gr.Button("提交评估")
|
148 |
+
|
149 |
+
# 结果输出
|
150 |
+
result_output = gr.JSON(label="评估结果")
|
151 |
+
|
152 |
+
with gr.Column(scale=2):
|
153 |
+
# 队列状态
|
154 |
+
queue_status = gr.HTML(label="队列状态")
|
155 |
+
refresh_btn = gr.Button("刷新队列状态")
|
156 |
+
|
157 |
+
# 获取特定结果
|
158 |
+
with gr.Row():
|
159 |
+
request_id_input = gr.Textbox(label="请求ID")
|
160 |
+
get_result_btn = gr.Button("获取结果")
|
161 |
+
|
162 |
+
# 特定结果输出
|
163 |
+
specific_result = gr.JSON(label="请求结果")
|
164 |
+
|
165 |
+
# 事件处理
|
166 |
+
submit_btn.click(
|
167 |
+
fn=self.submit_evaluation,
|
168 |
+
inputs=[json_input],
|
169 |
+
outputs=[result_output]
|
170 |
+
)
|
171 |
+
|
172 |
+
refresh_btn.click(
|
173 |
+
fn=self.render_queue_status,
|
174 |
+
inputs=[],
|
175 |
+
outputs=[queue_status]
|
176 |
+
)
|
177 |
+
|
178 |
+
get_result_btn.click(
|
179 |
+
fn=self.get_evaluation_result,
|
180 |
+
inputs=[request_id_input],
|
181 |
+
outputs=[specific_result]
|
182 |
+
)
|
183 |
+
|
184 |
+
# 定时更新队列状态
|
185 |
+
app.load(fn=self.render_queue_status, inputs=None, outputs=queue_status, every=2)
|
186 |
+
|
187 |
+
self.app = app
|
188 |
+
return app
|
189 |
+
|
190 |
+
def create_api_interface(self):
|
191 |
+
"""创建API接口
|
192 |
+
|
193 |
+
Returns:
|
194 |
+
gr.Interface: Gradio接口
|
195 |
+
"""
|
196 |
+
return gr.Interface(
|
197 |
+
fn=self.submit_evaluation,
|
198 |
+
inputs=gr.JSON(),
|
199 |
+
outputs=gr.JSON(),
|
200 |
+
title="代码评估API",
|
201 |
+
description="API接口用于提交代码评估请求"
|
202 |
+
)
|
203 |
+
|
204 |
+
def get_app(self):
|
205 |
+
"""获取Gradio应用
|
206 |
+
|
207 |
+
Returns:
|
208 |
+
gr.Blocks: Gradio应用
|
209 |
+
"""
|
210 |
+
if self.app is None:
|
211 |
+
self.create_evaluation_interface()
|
212 |
+
return self.app
|