3v324v23 commited on
Commit
dd10f90
·
1 Parent(s): e70dce5
Files changed (6) hide show
  1. README.md +42 -0
  2. app.py +30 -733
  3. requirements.txt +3 -1
  4. src/evaluator.py +94 -0
  5. src/queue_manager.py +194 -0
  6. src/ui.py +212 -0
README.md CHANGED
@@ -8,3 +8,45 @@ pinned: false
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
+
12
+ # 代码评估服务
13
+
14
+ 这是一个支持多种编程语言的代码评估服务,专为Hugging Face Spaces设计。
15
+
16
+ ## 功能特点
17
+
18
+ - 支持多种编程语言的代码评估
19
+ - 使用消息队列处理并发请求
20
+ - 实时显示队列状态
21
+ - 提供API接口,便于集成
22
+
23
+ ## 系统架构
24
+
25
+ 系统采用模块化设计,主要由以下组件构成:
26
+
27
+ - **Queue Manager**: 处理请求队列,实现请求排队和顺序处理
28
+ - **Evaluator**: 负责代码评估逻辑,支持多种编程语言
29
+ - **UI**: 基于Gradio的用户界面,展示队列状态和评估结果
30
+
31
+ ## 使用方法
32
+
33
+ 1. 在Gradio界面中,提交代码评估请求
34
+ 2. 系统将请求加入队列并返回请求ID
35
+ 3. 使用请求ID查询评估结果
36
+ 4. 实时查看队列状态
37
+
38
+ ## 文件结构
39
+
40
+ ```
41
+ .
42
+ ├── app.py # 主应用入口
43
+ ├── src/
44
+ │ ├── __init__.py # 包初始化
45
+ │ ├── containerized_eval.py # 容器化评估工具
46
+ │ ├── evaluator.py # 代码评估逻辑
47
+ │ ├── queue_manager.py # 队列管理器
48
+ │ ├── ui.py # 用户界面组件
49
+ │ └── ... # 其他语言评估模块
50
+ ├── requirements.txt # 依赖项
51
+ └── Dockerfile # 容器配置
52
+ ```
app.py CHANGED
@@ -1,21 +1,9 @@
1
- import gradio as gr
2
- import json
3
- import importlib
4
  import os
5
  import sys
6
  from pathlib import Path
7
- import concurrent.futures
8
- import multiprocessing
9
- import time
10
- import threading
11
- import queue
12
- import uuid
13
- import numpy as np
14
- from datetime import datetime
15
- from tqdm.auto import tqdm
16
- from src.containerized_eval import eval_string_script
17
 
18
- # Add current directory and src directory to module search path
19
  current_dir = os.path.dirname(os.path.abspath(__file__))
20
  src_dir = os.path.join(current_dir, "src")
21
  if current_dir not in sys.path:
@@ -23,732 +11,41 @@ if current_dir not in sys.path:
23
  if src_dir not in sys.path:
24
  sys.path.append(src_dir)
25
 
26
- # Create message queue
27
- task_queue = queue.Queue()
28
- # Dictionary to store task status
29
- task_status = {}
30
- # List to store task history, max 200 tasks
31
- task_history = []
32
- # Lock for shared resources
33
- lock = threading.Lock()
34
- # Number of worker threads
35
- worker_threads = 1 # Process only one task at a time
36
- # Flag for running background threads
37
- running = True
38
- # Mapping from task type to processing time
39
- task_type_times = {}
40
 
41
- def queue_processor():
42
- """Process tasks in the queue"""
43
- while running:
44
- try:
45
- # Use get with a timeout to prevent busy waiting
46
- try:
47
- task_id, input_data, request_time = task_queue.get(timeout=0.5)
48
- except queue.Empty:
49
- continue
50
-
51
- try:
52
- with lock:
53
- if task_id not in task_status:
54
- # Task was cancelled or removed
55
- task_queue.task_done()
56
- continue
57
-
58
- task_status[task_id]['status'] = 'processing'
59
- task_status[task_id]['start_time'] = time.time()
60
-
61
- if isinstance(input_data, list) and len(input_data) > 0:
62
- sample_task = input_data[0]
63
- language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
64
- task_size = len(input_data)
65
- task_complexity = _estimate_task_complexity(input_data)
66
-
67
- with lock:
68
- task_status[task_id]['estimated_factors'] = {
69
- 'language': language,
70
- 'size': task_size,
71
- 'complexity': task_complexity
72
- }
73
-
74
- result = evaluate(input_data)
75
-
76
- end_time = time.time()
77
- process_time = end_time - task_status[task_id]['start_time']
78
-
79
- with lock:
80
- if task_id in task_status: # Check if task still exists
81
- task_status[task_id]['status'] = 'completed'
82
- task_status[task_id]['result'] = result
83
- task_status[task_id]['end_time'] = end_time
84
- task_status[task_id]['process_time'] = process_time
85
-
86
- if 'estimated_factors' in task_status[task_id]:
87
- factors = task_status[task_id]['estimated_factors']
88
- key = f"{factors['language']}_{factors['complexity']}"
89
-
90
- if key not in task_type_times:
91
- task_type_times[key] = []
92
-
93
- task_type_times[key].append(process_time / factors['size'])
94
- if len(task_type_times[key]) > 10:
95
- task_type_times[key] = task_type_times[key][-10:]
96
-
97
- task_history.append({
98
- 'task_id': task_id,
99
- 'request_time': request_time,
100
- 'process_time': process_time,
101
- 'status': 'completed',
102
- 'factors': task_status[task_id].get('estimated_factors', {})
103
- })
104
- while len(task_history) > 200:
105
- task_history.pop(0)
106
- except Exception as e:
107
- print(f"Error processing task {task_id}: {str(e)}")
108
- with lock:
109
- if task_id in task_status:
110
- task_status[task_id]['status'] = 'error'
111
- task_status[task_id]['error'] = str(e)
112
- task_status[task_id]['end_time'] = time.time()
113
- finally:
114
- task_queue.task_done()
115
-
116
- except Exception as e:
117
- print(f"Critical error in queue processor: {str(e)}")
118
- time.sleep(1) # Avoid tight loop in case of persistent errors
119
-
120
- def _estimate_task_complexity(tasks):
121
- """Estimate task complexity
122
-
123
- Returns: 'simple', 'medium', or 'complex'
124
- """
125
- total_code_length = 0
126
- count = 0
127
 
128
- for task in tasks:
129
- if isinstance(task, dict):
130
- prompt = task.get('prompt', '')
131
- tests = task.get('tests', '')
132
- completions = task.get('processed_completions', [])
133
-
134
- code_length = len(prompt) + len(tests)
135
- if completions:
136
- code_length += sum(len(comp) for comp in completions)
137
-
138
- total_code_length += code_length
139
- count += 1
140
 
141
- if count == 0:
142
- return 'medium'
 
143
 
144
- avg_length = total_code_length / count
 
 
145
 
146
- if avg_length < 1000:
147
- return 'simple'
148
- elif avg_length < 5000:
149
- return 'medium'
150
- else:
151
- return 'complex'
152
-
153
- def evaluate(input_data):
154
- """Main function for code evaluation"""
155
- try:
156
- if not isinstance(input_data, list):
157
- return {"status": "Exception", "error": "Input must be a list"}
158
-
159
- results = []
160
-
161
- # Use a moderate number of workers for all language tests to ensure stability
162
- # This prevents resource contention regardless of language
163
- max_workers = max(1, min(multiprocessing.cpu_count() // 2, 4))
164
-
165
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
166
- future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
167
- for future in concurrent.futures.as_completed(future_to_item):
168
- item = future_to_item[future]
169
- try:
170
- result = future.result()
171
- item.update(result)
172
- results.append(item)
173
- except Exception as e:
174
- item.update({"status": "Exception", "error": str(e)})
175
- results.append(item)
176
- return results
177
-
178
- except Exception as e:
179
- return {"status": "Exception", "error": str(e)}
180
-
181
- def evaluate_single_case(input_data):
182
- """Evaluate a single code case"""
183
- try:
184
- if not isinstance(input_data, dict):
185
- return {"status": "Exception", "error": "Input item must be a dictionary"}
186
-
187
- language = input_data.get('language')
188
- completions = input_data.get('processed_completions', [])
189
-
190
- if not completions:
191
- return {"status": "Exception", "error": "No code provided"}
192
-
193
- # Use a retry mechanism for all languages for better reliability
194
- max_retries = 2 # One retry for all languages
195
-
196
- results = []
197
- for comp in completions:
198
- code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
199
-
200
- # Try up to max_retries + 1 times for all test cases
201
- for attempt in range(max_retries + 1):
202
- result = evaluate_code(code, language)
203
-
204
- # If success or last attempt, return/record the result
205
- if result["status"] == "OK" or attempt == max_retries:
206
- if result["status"] == "OK":
207
- return result
208
- results.append(result)
209
- break
210
-
211
- # For retries, briefly wait to allow resources to stabilize
212
- time.sleep(0.3)
213
-
214
- return results[0]
215
-
216
- except Exception as e:
217
- return {"status": "Exception", "error": str(e)}
218
-
219
- def evaluate_code(code, language):
220
- """Evaluate code in a specific language"""
221
- try:
222
- result = eval_string_script(language, code)
223
- return result
224
-
225
- except Exception as e:
226
- return {"status": "Exception", "error": str(e)}
227
-
228
- def synchronous_evaluate(input_data):
229
- """Synchronously evaluate code, compatible with original interface"""
230
- try:
231
- if isinstance(input_data, list) and len(input_data) > 0:
232
- sample_task = input_data[0]
233
- language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
234
- task_size = len(input_data)
235
- task_complexity = _estimate_task_complexity(input_data)
236
- else:
237
- language = 'unknown'
238
- task_size = 1
239
- task_complexity = 'medium'
240
-
241
- estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
242
- estimated_total_time = estimated_time_per_task * task_size
243
-
244
- queue_info = get_queue_status()
245
- waiting_tasks = queue_info['waiting_tasks']
246
-
247
- task_id = str(uuid.uuid4())
248
- request_time = time.time()
249
-
250
- with lock:
251
- task_status[task_id] = {
252
- 'status': 'queued',
253
- 'queued_time': request_time,
254
- 'queue_position': task_queue.qsize() + 1,
255
- 'synchronous': True,
256
- 'estimated_factors': {
257
- 'language': language,
258
- 'size': task_size,
259
- 'complexity': task_complexity
260
- },
261
- 'estimated_time': estimated_total_time
262
- }
263
-
264
- task_queue.put((task_id, input_data, request_time))
265
-
266
- # Set a reasonable timeout to avoid hanging
267
- max_wait_time = max(300, estimated_total_time * 2) # At least 5 minutes or double the estimated time
268
- start_wait = time.time()
269
-
270
  while True:
271
- with lock:
272
- if task_id in task_status:
273
- status = task_status[task_id]['status']
274
- if status == 'completed':
275
- result = task_status[task_id]['result']
276
- task_status.pop(task_id, None)
277
- return result
278
- elif status == 'error':
279
- error = task_status[task_id].get('error', 'Unknown error')
280
- task_status.pop(task_id, None)
281
- return {"status": "Exception", "error": error}
282
- else:
283
- # Task somehow disappeared
284
- return {"status": "Exception", "error": "Task was lost during processing"}
285
-
286
- # Check if we've waited too long
287
- if time.time() - start_wait > max_wait_time:
288
- with lock:
289
- if task_id in task_status:
290
- task_status[task_id]['status'] = 'error'
291
- task_status[task_id]['error'] = f"Task timed out after {max_wait_time} seconds"
292
- return {"status": "Exception", "error": f"Evaluation timed out after {max_wait_time} seconds"}
293
-
294
- time.sleep(0.1)
295
-
296
- except Exception as e:
297
- print(f"Error in synchronous_evaluate: {str(e)}")
298
- return {"status": "Exception", "error": str(e)}
299
-
300
- def _get_estimated_time_for_task(language, complexity):
301
- """Get estimated processing time for a specific task type"""
302
- key = f"{language}_{complexity}"
303
-
304
- if key in task_type_times and len(task_type_times[key]) > 0:
305
- return np.median(task_type_times[key])
306
-
307
- if complexity == 'simple':
308
- return 1.0
309
- elif complexity == 'medium':
310
- return 3.0
311
- else: # complex
312
- return 8.0
313
-
314
- def enqueue_task(input_data):
315
- """Add task to queue"""
316
- if isinstance(input_data, list) and len(input_data) > 0:
317
- sample_task = input_data[0]
318
- language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
319
- task_size = len(input_data)
320
- task_complexity = _estimate_task_complexity(input_data)
321
- else:
322
- language = 'unknown'
323
- task_size = 1
324
- task_complexity = 'medium'
325
-
326
- estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
327
- estimated_total_time = estimated_time_per_task * task_size
328
-
329
- task_id = str(uuid.uuid4())
330
- request_time = time.time()
331
-
332
- # Get current queue size before adding new task
333
- current_queue_size = task_queue.qsize()
334
-
335
- with lock:
336
- task_status[task_id] = {
337
- 'status': 'queued',
338
- 'queued_time': request_time,
339
- 'queue_position': current_queue_size + 1,
340
- 'estimated_factors': {
341
- 'language': language,
342
- 'size': task_size,
343
- 'complexity': task_complexity
344
- },
345
- 'estimated_time': estimated_total_time
346
- }
347
-
348
- queue_info = get_queue_status()
349
- est_wait = queue_info['estimated_wait']
350
-
351
- task_queue.put((task_id, input_data, request_time))
352
-
353
- return {
354
- 'task_id': task_id,
355
- 'status': 'queued',
356
- 'queue_position': task_status[task_id]['queue_position'],
357
- 'estimated_wait': est_wait,
358
- 'estimated_processing': estimated_total_time
359
- }
360
-
361
- def check_status(task_id):
362
- """Check task status"""
363
- with lock:
364
- if task_id not in task_status:
365
- return {'status': 'not_found'}
366
-
367
- status_info = task_status[task_id].copy()
368
-
369
- if status_info['status'] in ['completed', 'error'] and time.time() - status_info.get('end_time', 0) > 3600:
370
- task_status.pop(task_id, None)
371
-
372
- return status_info
373
-
374
- def get_queue_status():
375
- """Get queue status"""
376
- with lock:
377
- # Get all tasks that are either queued or processing
378
- queued_tasks = [t for t in task_status.values() if t['status'] == 'queued']
379
- processing_tasks = [t for t in task_status.values() if t['status'] == 'processing']
380
-
381
- # Get the actual queue size from the queue object
382
- actual_queue_size = task_queue.qsize()
383
-
384
- # Use the larger of actual queue size or queued tasks count
385
- queue_size = max(actual_queue_size, len(queued_tasks))
386
- active_tasks = len(processing_tasks)
387
- waiting_tasks = queue_size
388
-
389
- # Clean up completed or error tasks that are older than 1 hour
390
- current_time = time.time()
391
- for task_id in list(task_status.keys()):
392
- if task_status[task_id]['status'] in ['completed', 'error']:
393
- if current_time - task_status[task_id].get('end_time', 0) > 3600:
394
- task_status.pop(task_id, None)
395
-
396
- remaining_processing_time = 0
397
- for task in processing_tasks:
398
- if 'start_time' in task and 'estimated_time' in task:
399
- elapsed = time.time() - task['start_time']
400
- remaining = max(0, task['estimated_time'] - elapsed)
401
- remaining_processing_time += remaining
402
- else:
403
- remaining_processing_time += 2
404
-
405
- if active_tasks > 0:
406
- remaining_processing_time = remaining_processing_time / min(active_tasks, worker_threads)
407
-
408
- queued_processing_time = 0
409
- for task in queued_tasks:
410
- if 'estimated_time' in task:
411
- queued_processing_time += task['estimated_time']
412
- else:
413
- queued_processing_time += 5
414
-
415
- estimated_wait = remaining_processing_time + queued_processing_time
416
-
417
- if task_history:
418
- prediction_ratios = []
419
- for task in task_history:
420
- if 'factors' in task and 'estimated_time' in task:
421
- prediction_ratios.append(task['process_time'] / task['estimated_time'])
422
-
423
- if prediction_ratios:
424
- correction_factor = np.median(prediction_ratios)
425
- correction_factor = max(0.5, min(2.0, correction_factor))
426
- estimated_wait *= correction_factor
427
-
428
- estimated_wait = max(0.1, estimated_wait)
429
- if waiting_tasks == 0 and active_tasks == 0:
430
- estimated_wait = 0
431
-
432
- recent_tasks = task_history[-5:] if task_history else []
433
-
434
- return {
435
- 'queue_size': queue_size,
436
- 'active_tasks': active_tasks,
437
- 'waiting_tasks': waiting_tasks,
438
- 'worker_threads': worker_threads,
439
- 'estimated_wait': estimated_wait,
440
- 'recent_tasks': recent_tasks
441
- }
442
-
443
- def format_time(seconds):
444
- """Format time into readable format"""
445
- if seconds < 60:
446
- return f"{seconds:.1f} seconds"
447
- elif seconds < 3600:
448
- minutes = int(seconds / 60)
449
- seconds = seconds % 60
450
- return f"{minutes}m {seconds:.1f}s"
451
- else:
452
- hours = int(seconds / 3600)
453
- minutes = int((seconds % 3600) / 60)
454
- return f"{hours}h {minutes}m"
455
-
456
- def ui_get_queue_info():
457
- """Get queue info for UI"""
458
- queue_info = get_queue_status()
459
-
460
- tasks_html = ""
461
- for task in reversed(queue_info['recent_tasks']):
462
- tasks_html += f"""
463
- <tr>
464
- <td>{task['task_id'][:8]}...</td>
465
- <td>{datetime.fromtimestamp(task['request_time']).strftime('%H:%M:%S')}</td>
466
- <td>{format_time(task['process_time'])}</td>
467
- </tr>
468
- """
469
-
470
- if not tasks_html:
471
- tasks_html = """
472
- <tr>
473
- <td colspan="3" style="text-align: center; padding: 20px;">No historical tasks</td>
474
- </tr>
475
- """
476
-
477
- return f"""
478
- <div class="dashboard">
479
- <div class="queue-info-card main-card">
480
- <h3 class="card-title">Queue Status Monitor</h3>
481
- <div class="queue-stats">
482
- <div class="stat-item">
483
- <div class="stat-value">{queue_info['waiting_tasks']}</div>
484
- <div class="stat-label">Waiting</div>
485
- </div>
486
- <div class="stat-item">
487
- <div class="stat-value">{queue_info['active_tasks']}</div>
488
- <div class="stat-label">Processing</div>
489
- </div>
490
- <div class="stat-item">
491
- <div class="stat-value">{queue_info['worker_threads']}</div>
492
- <div class="stat-label">Worker Threads</div>
493
- </div>
494
- </div>
495
-
496
- <div class="wait-time">
497
- <p><b>Current Estimated Wait Time:</b> {format_time(queue_info['estimated_wait'])}</p>
498
- <p class="last-update"><small>Last update: {datetime.now().strftime('%H:%M:%S')}</small></p>
499
- </div>
500
- </div>
501
-
502
- <div class="queue-info-card history-card">
503
- <h3 class="card-title">Recently Processed Tasks</h3>
504
- <table class="recent-tasks">
505
- <thead>
506
- <tr>
507
- <th>Task ID</th>
508
- <th>Request Time</th>
509
- <th>Processing Time</th>
510
- </tr>
511
- </thead>
512
- <tbody>
513
- {tasks_html}
514
- </tbody>
515
- </table>
516
- </div>
517
- </div>
518
- """
519
-
520
- def launch_workers():
521
- """Launch worker threads"""
522
- global running
523
- running = True
524
-
525
- for _ in range(worker_threads):
526
- worker = threading.Thread(target=queue_processor)
527
- worker.daemon = True
528
- worker.start()
529
-
530
- # Custom CSS
531
- custom_css = """
532
- .container {
533
- max-width: 1200px;
534
- margin: 0 auto;
535
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
536
- }
537
-
538
- .dashboard {
539
- display: flex;
540
- flex-direction: column;
541
- gap: 20px;
542
- }
543
-
544
- .card-title {
545
- color: #333;
546
- border-bottom: 2px solid #ddd;
547
- padding-bottom: 10px;
548
- margin-top: 0;
549
- }
550
-
551
- .status-card, .queue-info-card {
552
- background: #fff;
553
- border-radius: 12px;
554
- padding: 20px;
555
- margin: 10px 0;
556
- box-shadow: 0 4px 15px rgba(0,0,0,0.08);
557
- }
558
-
559
- .main-card {
560
- border-top: 5px solid #4285f4;
561
- }
562
-
563
- .history-card {
564
- border-top: 5px solid #34a853;
565
- }
566
-
567
- .status-card.success {
568
- background: #e7f5e7;
569
- border-left: 5px solid #28a745;
570
- }
571
-
572
- .status-card.error {
573
- background: #f8d7da;
574
- border-left: 5px solid #dc3545;
575
- }
576
-
577
- .error-message {
578
- color: #dc3545;
579
- font-weight: bold;
580
- padding: 10px;
581
- background: #f8d7da;
582
- border-radius: 5px;
583
- }
584
-
585
- .notice {
586
- color: #0c5460;
587
- background-color: #d1ecf1;
588
- padding: 10px;
589
- border-radius: 5px;
590
- }
591
-
592
- .queue-stats {
593
- display: flex;
594
- justify-content: space-around;
595
- margin: 20px 0;
596
- }
597
-
598
- .stat-item {
599
- text-align: center;
600
- padding: 15px;
601
- background: #f8f9fa;
602
- border-radius: 10px;
603
- min-width: 120px;
604
- transition: transform 0.3s ease;
605
- }
606
-
607
- .stat-item:hover {
608
- transform: translateY(-5px);
609
- box-shadow: 0 5px 15px rgba(0,0,0,0.1);
610
- }
611
-
612
- .stat-value {
613
- font-size: 32px;
614
- font-weight: bold;
615
- color: #4285f4;
616
- margin-bottom: 5px;
617
- }
618
-
619
- .stat-label {
620
- color: #5f6368;
621
- font-size: 16px;
622
- }
623
-
624
- .wait-time {
625
- text-align: center;
626
- margin: 20px 0;
627
- padding: 15px;
628
- background: #f1f3f4;
629
- border-radius: 8px;
630
- font-size: 18px;
631
- }
632
-
633
- .last-update {
634
- color: #80868b;
635
- margin-top: 10px;
636
- margin-bottom: 0;
637
- }
638
-
639
- .recent-tasks {
640
- width: 100%;
641
- border-collapse: collapse;
642
- margin-top: 15px;
643
- background: white;
644
- box-shadow: 0 1px 3px rgba(0,0,0,0.05);
645
- }
646
-
647
- .recent-tasks th, .recent-tasks td {
648
- border: 1px solid #e0e0e0;
649
- padding: 12px 15px;
650
- text-align: center;
651
- }
652
-
653
- .recent-tasks th {
654
- background-color: #f1f3f4;
655
- color: #202124;
656
- font-weight: 500;
657
- }
658
-
659
- .recent-tasks tbody tr:hover {
660
- background-color: #f8f9fa;
661
- }
662
-
663
- .tabs {
664
- margin-top: 20px;
665
- }
666
-
667
- button.primary {
668
- background-color: #4285f4;
669
- color: white;
670
- padding: 10px 20px;
671
- border: none;
672
- border-radius: 4px;
673
- cursor: pointer;
674
- font-size: 16px;
675
- font-weight: 500;
676
- transition: background-color 0.3s;
677
- }
678
-
679
- button.primary:hover {
680
- background-color: #3367d6;
681
- }
682
- """
683
-
684
- # Initialize and launch worker threads
685
- launch_workers()
686
-
687
- # Create Gradio interface
688
- with gr.Blocks(css=custom_css) as demo:
689
- gr.Markdown("# Code Evaluation Service")
690
- gr.Markdown("Code evaluation service supporting multiple programming languages, using queue mechanism to process requests")
691
-
692
- with gr.Row():
693
- with gr.Column(scale=3):
694
- # Queue status info card
695
- queue_info_html = gr.HTML(ui_get_queue_info())
696
- refresh_queue_btn = gr.Button("Refresh Queue Status", variant="primary")
697
-
698
- # Hidden API interface components
699
- with gr.Row(visible=False):
700
- api_input = gr.JSON()
701
- api_output = gr.JSON()
702
-
703
- # Define update function
704
- def update_queue_info():
705
- try:
706
- info = ui_get_queue_info()
707
- return info
708
- except Exception as e:
709
- print(f"Error updating queue info: {str(e)}")
710
- return f"""
711
- <div class="dashboard">
712
- <div class="queue-info-card main-card">
713
- <h3 class="card-title">Queue Status Monitor</h3>
714
- <div class="queue-stats">
715
- <p>Error refreshing queue status. Click refresh to try again.</p>
716
- </div>
717
- </div>
718
- </div>
719
- """
720
-
721
- # Set up periodic refresh with error handling
722
- refresh_interval = 3
723
-
724
- def safe_refresh():
725
- try:
726
- return update_queue_info()
727
- except Exception as e:
728
- print(f"Safe refresh error: {str(e)}")
729
- return queue_info_html.value # Keep existing value on error
730
-
731
- # Update queue info periodically
732
- demo.load(safe_refresh, None, queue_info_html, every=refresh_interval)
733
 
734
- # Refresh button event
735
- refresh_queue_btn.click(update_queue_info, None, queue_info_html)
736
 
737
- # Add evaluation endpoint compatible with original interface
738
- demo.queue()
739
- evaluate_endpoint = demo.load(fn=synchronous_evaluate, inputs=api_input, outputs=api_output, api_name="evaluate")
740
 
741
  if __name__ == "__main__":
742
- try:
743
- # Use configuration suitable for Hugging Face Spaces
744
- demo.launch(
745
- server_name="0.0.0.0", # Listen on all network interfaces
746
- share=False, # No need for sharing link in Spaces
747
- show_error=True, # Show detailed error messages
748
- debug=True, # Enable debug mode for better error reporting
749
- max_threads=10, # Limit number of threads to avoid resource contention
750
- quiet=False # Log to console for debugging
751
- )
752
- finally:
753
- # Stop worker threads
754
- running = False
 
 
 
 
1
  import os
2
  import sys
3
  from pathlib import Path
4
+ import gradio as gr
 
 
 
 
 
 
 
 
 
5
 
6
+ # 添加当前目录和src目录到模块搜索路径
7
  current_dir = os.path.dirname(os.path.abspath(__file__))
8
  src_dir = os.path.join(current_dir, "src")
9
  if current_dir not in sys.path:
 
11
  if src_dir not in sys.path:
12
  sys.path.append(src_dir)
13
 
14
+ from src.queue_manager import QueueManager
15
+ from src.evaluator import evaluate
16
+ from src.ui import EvaluationUI
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ def main():
19
+ """主函数,创建并启动应用"""
20
+ # 创建队列管理器
21
+ queue_manager = QueueManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # 设置评估函数
24
+ queue_manager.set_evaluator(evaluate)
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # 创建UI
27
+ ui = EvaluationUI(queue_manager)
28
+ app = ui.create_evaluation_interface()
29
 
30
+ # 启动清理线程
31
+ import threading
32
+ import time
33
 
34
+ def cleanup_thread():
35
+ """定期清理已完成的请求"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  while True:
37
+ try:
38
+ queue_manager.clean_completed(max_age=3600) # 清理1小时前的已完成请求
39
+ except Exception as e:
40
+ print(f"Error in cleanup thread: {e}")
41
+ time.sleep(600) # 每10分钟运行一次
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ cleanup_thread = threading.Thread(target=cleanup_thread, daemon=True)
44
+ cleanup_thread.start()
45
 
46
+ # 返回应用
47
+ return app
 
48
 
49
  if __name__ == "__main__":
50
+ app = main()
51
+ app.launch()
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- gradio==4.44.1
 
 
 
1
+ gradio==4.44.1
2
+ typing_extensions>=4.0.0
3
+ # dataclasses is built-in for Python 3.7+
src/evaluator.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import concurrent.futures
2
+ import multiprocessing
3
+ from .containerized_eval import eval_string_script
4
+
5
+ def evaluate(input_data):
6
+ """评估代码的主函数
7
+
8
+ Args:
9
+ input_data: 列表(批量处理多个测试用例)
10
+
11
+ Returns:
12
+ list: 包含评估结果的列表
13
+ """
14
+ try:
15
+ if not isinstance(input_data, list):
16
+ return {"status": "Exception", "error": "Input must be a list"}
17
+
18
+ results = []
19
+ max_workers = multiprocessing.cpu_count() // 2
20
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
21
+ future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
22
+ for future in concurrent.futures.as_completed(future_to_item):
23
+ item = future_to_item[future]
24
+ try:
25
+ result = future.result()
26
+ item.update(result)
27
+ results.append(item)
28
+ except Exception as e:
29
+ item.update({"status": "Exception", "error": str(e)})
30
+ results.append(item)
31
+ return results
32
+
33
+ except Exception as e:
34
+ return {"status": "Exception", "error": str(e)}
35
+
36
+ def evaluate_single_case(input_data):
37
+ """评估单个代码用例
38
+
39
+ Args:
40
+ input_data: 字典(包含代码信息)
41
+
42
+ Returns:
43
+ dict: 包含评估结果的字典
44
+ """
45
+ try:
46
+ if not isinstance(input_data, dict):
47
+ return {"status": "Exception", "error": "Input item must be a dictionary"}
48
+
49
+ language = input_data.get('language')
50
+ completions = input_data.get('processed_completions', [])
51
+
52
+ if not completions:
53
+ return {"status": "Exception", "error": "No code provided"}
54
+
55
+ results = []
56
+ for comp in completions:
57
+ code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
58
+ result = evaluate_code(code, language)
59
+ if result["status"] == "OK":
60
+ return {"status": "pass", "compiled_code": code}
61
+ print(f'Code failed to compile: \n{code}')
62
+ result["compiled_code"] = code
63
+ results.append(result)
64
+
65
+ return results[0]
66
+
67
+ except Exception as e:
68
+ return {"status": "Exception", "error": str(e)}
69
+
70
+ def evaluate_code(code, language):
71
+ """评估特定语言的代码
72
+
73
+ Args:
74
+ code (str): 要评估的代码
75
+ language (str): 编程语言
76
+
77
+ Returns:
78
+ dict: 包含评估结果的字典
79
+ """
80
+ try:
81
+ # 使用containerized_eval中的eval_string_script函数
82
+ result = eval_string_script(language, code)
83
+
84
+ if result["exit_code"] == 0:
85
+ return {"status": "OK", "output": result["stdout"]}
86
+ else:
87
+ return {
88
+ "status": "Error",
89
+ "error": result["stderr"] if result["stderr"] else "Unknown error",
90
+ "output": result["stdout"]
91
+ }
92
+
93
+ except Exception as e:
94
+ return {"status": "Exception", "error": str(e)}
src/queue_manager.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import queue
2
+ import threading
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Any, Optional
6
+ import uuid
7
+
8
+
9
+ @dataclass
10
+ class QueueItem:
11
+ """代表队列中的一个评估请求"""
12
+ id: str # 唯一标识符
13
+ input_data: Dict[str, Any] # 输入数据
14
+ status: str = "queued" # 状态:queued, processing, completed, error
15
+ result: Optional[Dict[str, Any]] = None # 评估结果
16
+ created_at: float = field(default_factory=time.time) # 创建时间
17
+ started_at: Optional[float] = None # 开始处理时间
18
+ completed_at: Optional[float] = None # 完成时间
19
+
20
+
21
+ class QueueManager:
22
+ """管理代码评估请求队列"""
23
+
24
+ def __init__(self):
25
+ self.queue = queue.Queue() # 请求队列
26
+ self.queue_items = {} # 存储所有队列项,以ID为键
27
+ self.processing_thread = None # 处理线程
28
+ self.running = False # 是否正在运行
29
+ self.evaluator = None # 评估器,将在开始时设置
30
+
31
+ def set_evaluator(self, evaluator_func):
32
+ """设置评估函数
33
+
34
+ Args:
35
+ evaluator_func: 评估函数,接受输入数据并返回结果
36
+ """
37
+ self.evaluator = evaluator_func
38
+
39
+ def enqueue(self, input_data):
40
+ """添加新的评估请求到队列
41
+
42
+ Args:
43
+ input_data: 输入数据,包含要评估的代码
44
+
45
+ Returns:
46
+ str: 请求的唯一ID
47
+ """
48
+ item_id = str(uuid.uuid4())
49
+ queue_item = QueueItem(id=item_id, input_data=input_data)
50
+ self.queue_items[item_id] = queue_item
51
+ self.queue.put(item_id)
52
+
53
+ # 如果处理线程未运行,则启动它
54
+ if not self.running:
55
+ self.start_processing()
56
+
57
+ return item_id
58
+
59
+ def start_processing(self):
60
+ """启动队列处理线程"""
61
+ if self.processing_thread is None or not self.processing_thread.is_alive():
62
+ self.running = True
63
+ self.processing_thread = threading.Thread(target=self._process_queue)
64
+ self.processing_thread.daemon = True
65
+ self.processing_thread.start()
66
+
67
+ def stop_processing(self):
68
+ """停止队列处理"""
69
+ self.running = False
70
+ if self.processing_thread and self.processing_thread.is_alive():
71
+ self.processing_thread.join(timeout=1.0)
72
+
73
+ def _process_queue(self):
74
+ """处理队列中的请求(在单独的线程中运行)"""
75
+ while self.running:
76
+ try:
77
+ # 尝试从队列获取一个项目,如果队列为空,等待1秒后重试
78
+ try:
79
+ item_id = self.queue.get(timeout=1.0)
80
+ except queue.Empty:
81
+ continue
82
+
83
+ # 获取队列项并更新状态
84
+ queue_item = self.queue_items[item_id]
85
+ queue_item.status = "processing"
86
+ queue_item.started_at = time.time()
87
+
88
+ # 处理请求
89
+ try:
90
+ if self.evaluator:
91
+ result = self.evaluator(queue_item.input_data)
92
+ queue_item.result = result
93
+ queue_item.status = "completed"
94
+ else:
95
+ queue_item.status = "error"
96
+ queue_item.result = {"status": "Error", "error": "No evaluator function set"}
97
+ except Exception as e:
98
+ queue_item.status = "error"
99
+ queue_item.result = {"status": "Exception", "error": str(e)}
100
+
101
+ # 更新完成时间
102
+ queue_item.completed_at = time.time()
103
+
104
+ # 标记任务为完成
105
+ self.queue.task_done()
106
+
107
+ except Exception as e:
108
+ print(f"Error in queue processing: {e}")
109
+ time.sleep(1) # 防止过度CPU使用
110
+
111
+ def get_queue_status(self):
112
+ """获取队列状态信息
113
+
114
+ Returns:
115
+ dict: 包含队列状态的字典
116
+ """
117
+ queued = 0
118
+ processing = 0
119
+ completed = 0
120
+ error = 0
121
+
122
+ # 统计各状态的数量
123
+ for item in self.queue_items.values():
124
+ if item.status == "queued":
125
+ queued += 1
126
+ elif item.status == "processing":
127
+ processing += 1
128
+ elif item.status == "completed":
129
+ completed += 1
130
+ elif item.status == "error":
131
+ error += 1
132
+
133
+ # 获取队列中的项目,仅包括等待和处理中的
134
+ queue_items = []
135
+ for item in self.queue_items.values():
136
+ if item.status in ["queued", "processing"]:
137
+ queue_items.append({
138
+ "id": item.id,
139
+ "status": item.status,
140
+ "created_at": item.created_at,
141
+ "started_at": item.started_at,
142
+ })
143
+
144
+ # 按创建时间排序
145
+ queue_items.sort(key=lambda x: x["created_at"])
146
+
147
+ return {
148
+ "stats": {
149
+ "queued": queued,
150
+ "processing": processing,
151
+ "completed": completed,
152
+ "error": error,
153
+ "total": len(self.queue_items)
154
+ },
155
+ "queue_items": queue_items
156
+ }
157
+
158
+ def get_result(self, item_id):
159
+ """获取特定请求的结果
160
+
161
+ Args:
162
+ item_id: 请求的唯一ID
163
+
164
+ Returns:
165
+ dict: 请求的状态和结果
166
+ """
167
+ if item_id in self.queue_items:
168
+ item = self.queue_items[item_id]
169
+ return {
170
+ "id": item.id,
171
+ "status": item.status,
172
+ "result": item.result,
173
+ "created_at": item.created_at,
174
+ "started_at": item.started_at,
175
+ "completed_at": item.completed_at
176
+ }
177
+ return {"status": "not_found", "error": "Request ID not found"}
178
+
179
+ def clean_completed(self, max_age=3600):
180
+ """清理完成的请求
181
+
182
+ Args:
183
+ max_age: 最大保留时间(秒),默认为1小时
184
+ """
185
+ current_time = time.time()
186
+ to_remove = []
187
+
188
+ for item_id, item in self.queue_items.items():
189
+ if item.status in ["completed", "error"]:
190
+ if item.completed_at and (current_time - item.completed_at) > max_age:
191
+ to_remove.append(item_id)
192
+
193
+ for item_id in to_remove:
194
+ del self.queue_items[item_id]
src/ui.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import time
4
+ from typing import Dict, Any, List, Optional
5
+
6
+ class EvaluationUI:
7
+ """处理评估服务的Gradio界面"""
8
+
9
+ def __init__(self, queue_manager):
10
+ """初始化UI
11
+
12
+ Args:
13
+ queue_manager: 队列管理器实例
14
+ """
15
+ self.queue_manager = queue_manager
16
+ self.app = None
17
+
18
+ def submit_evaluation(self, input_data):
19
+ """提交评估请求
20
+
21
+ Args:
22
+ input_data: 输入JSON数据
23
+
24
+ Returns:
25
+ dict: 包含请求ID和状态的字典
26
+ """
27
+ try:
28
+ # 解析JSON
29
+ if isinstance(input_data, str):
30
+ input_data = json.loads(input_data)
31
+
32
+ # 提交到队列
33
+ request_id = self.queue_manager.enqueue(input_data)
34
+
35
+ # 返回请求ID
36
+ return {"request_id": request_id, "status": "queued"}
37
+
38
+ except Exception as e:
39
+ return {"status": "error", "message": str(e)}
40
+
41
+ def get_evaluation_result(self, request_id):
42
+ """获取评估结果
43
+
44
+ Args:
45
+ request_id: 请求ID
46
+
47
+ Returns:
48
+ dict: 评估结果
49
+ """
50
+ return self.queue_manager.get_result(request_id)
51
+
52
+ def get_queue_status(self):
53
+ """获取队列状态
54
+
55
+ Returns:
56
+ dict: 队列状态信息
57
+ """
58
+ return self.queue_manager.get_queue_status()
59
+
60
+ def render_queue_status(self):
61
+ """渲染队列状态HTML
62
+
63
+ Returns:
64
+ str: 队列状态的HTML表示
65
+ """
66
+ status = self.queue_manager.get_queue_status()
67
+ stats = status["stats"]
68
+ queue_items = status["queue_items"]
69
+
70
+ html = f"""
71
+ <div style="padding: 10px; background-color: #f5f5f5; border-radius: 5px; margin-bottom: 20px;">
72
+ <h3>队列状态</h3>
73
+ <div style="display: flex; justify-content: space-between; margin-bottom: 15px;">
74
+ <div style="text-align: center; padding: 5px; background-color: #e0f7fa; border-radius: 5px; flex: 1; margin-right: 5px;">
75
+ <div style="font-weight: bold; font-size: 24px;">{stats['queued']}</div>
76
+ <div>等待中</div>
77
+ </div>
78
+ <div style="text-align: center; padding: 5px; background-color: #fff9c4; border-radius: 5px; flex: 1; margin-right: 5px;">
79
+ <div style="font-weight: bold; font-size: 24px;">{stats['processing']}</div>
80
+ <div>处理中</div>
81
+ </div>
82
+ <div style="text-align: center; padding: 5px; background-color: #c8e6c9; border-radius: 5px; flex: 1; margin-right: 5px;">
83
+ <div style="font-weight: bold; font-size: 24px;">{stats['completed']}</div>
84
+ <div>已完成</div>
85
+ </div>
86
+ <div style="text-align: center; padding: 5px; background-color: #ffcdd2; border-radius: 5px; flex: 1;">
87
+ <div style="font-weight: bold; font-size: 24px;">{stats['error']}</div>
88
+ <div>错误</div>
89
+ </div>
90
+ </div>
91
+ """
92
+
93
+ if queue_items:
94
+ html += """
95
+ <h4>当前队列</h4>
96
+ <table style="width: 100%; border-collapse: collapse;">
97
+ <tr style="background-color: #e0e0e0;">
98
+ <th style="padding: 8px; text-align: left; border: 1px solid #ddd;">ID</th>
99
+ <th style="padding: 8px; text-align: left; border: 1px solid #ddd;">状态</th>
100
+ <th style="padding: 8px; text-align: left; border: 1px solid #ddd;">等待时间</th>
101
+ </tr>
102
+ """
103
+
104
+ current_time = time.time()
105
+ for item in queue_items:
106
+ status_color = "#fff9c4" if item["status"] == "processing" else "#e0f7fa"
107
+ wait_time = current_time - item["created_at"]
108
+ wait_time_str = f"{int(wait_time // 60)}分{int(wait_time % 60)}秒"
109
+
110
+ html += f"""
111
+ <tr>
112
+ <td style="padding: 8px; text-align: left; border: 1px solid #ddd;">{item['id'][:8]}...</td>
113
+ <td style="padding: 8px; text-align: left; border: 1px solid #ddd; background-color: {status_color};">
114
+ {"处理中" if item["status"] == "processing" else "等待中"}
115
+ </td>
116
+ <td style="padding: 8px; text-align: left; border: 1px solid #ddd;">{wait_time_str}</td>
117
+ </tr>
118
+ """
119
+
120
+ html += "</table>"
121
+ else:
122
+ html += "<p>队列为空</p>"
123
+
124
+ html += "</div>"
125
+ return html
126
+
127
+ def create_evaluation_interface(self):
128
+ """创建评估接口
129
+
130
+ Returns:
131
+ gr.Interface: Gradio接口
132
+ """
133
+ # 创建用于直接提交JSON的接口
134
+ with gr.Blocks(title="代码评估服务") as app:
135
+ with gr.Row():
136
+ with gr.Column(scale=3):
137
+ gr.Markdown("# 代码评估服务")
138
+ gr.Markdown("支持多种编程语言的代码评估服务,使用消息队列处理并发请求。")
139
+
140
+ # 输入JSON
141
+ json_input = gr.JSON(
142
+ label="输入JSON",
143
+ value=[{"language": "python", "processed_completions": ["def add(a, b):\n return a + b"], "prompt": "", "tests": "assert add(1, 2) == 3"}]
144
+ )
145
+
146
+ # 提交按钮
147
+ submit_btn = gr.Button("提交评估")
148
+
149
+ # 结果输出
150
+ result_output = gr.JSON(label="评估结果")
151
+
152
+ with gr.Column(scale=2):
153
+ # 队列状态
154
+ queue_status = gr.HTML(label="队列状态")
155
+ refresh_btn = gr.Button("刷新队列状态")
156
+
157
+ # 获取特定结果
158
+ with gr.Row():
159
+ request_id_input = gr.Textbox(label="请求ID")
160
+ get_result_btn = gr.Button("获取结果")
161
+
162
+ # 特定结果输出
163
+ specific_result = gr.JSON(label="请求结果")
164
+
165
+ # 事件处理
166
+ submit_btn.click(
167
+ fn=self.submit_evaluation,
168
+ inputs=[json_input],
169
+ outputs=[result_output]
170
+ )
171
+
172
+ refresh_btn.click(
173
+ fn=self.render_queue_status,
174
+ inputs=[],
175
+ outputs=[queue_status]
176
+ )
177
+
178
+ get_result_btn.click(
179
+ fn=self.get_evaluation_result,
180
+ inputs=[request_id_input],
181
+ outputs=[specific_result]
182
+ )
183
+
184
+ # 定时更新队列状态
185
+ app.load(fn=self.render_queue_status, inputs=None, outputs=queue_status, every=2)
186
+
187
+ self.app = app
188
+ return app
189
+
190
+ def create_api_interface(self):
191
+ """创建API接口
192
+
193
+ Returns:
194
+ gr.Interface: Gradio接口
195
+ """
196
+ return gr.Interface(
197
+ fn=self.submit_evaluation,
198
+ inputs=gr.JSON(),
199
+ outputs=gr.JSON(),
200
+ title="代码评估API",
201
+ description="API接口用于提交代码评估请求"
202
+ )
203
+
204
+ def get_app(self):
205
+ """获取Gradio应用
206
+
207
+ Returns:
208
+ gr.Blocks: Gradio应用
209
+ """
210
+ if self.app is None:
211
+ self.create_evaluation_interface()
212
+ return self.app