朱东升 commited on
Commit
de3b744
·
1 Parent(s): 99d69d8
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. app.py +623 -10
  3. src/evaluator.py +0 -78
  4. src/queue_manager.py +0 -269
  5. src/ui.py +0 -218
  6. src/utils.py +0 -62
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -4,7 +4,16 @@ import importlib
4
  import os
5
  import sys
6
  from pathlib import Path
 
7
  import multiprocessing
 
 
 
 
 
 
 
 
8
 
9
  # Add current directory and src directory to module search path
10
  current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -14,12 +23,618 @@ if current_dir not in sys.path:
14
  if src_dir not in sys.path:
15
  sys.path.append(src_dir)
16
 
17
- # Import functions from modules
18
- from src.queue_manager import (
19
- synchronous_evaluate, enqueue_task, check_status,
20
- get_queue_status, launch_workers, task_history
21
- )
22
- from src.ui import ui_get_queue_info, custom_css
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Initialize and launch worker threads
25
  launch_workers()
@@ -42,8 +657,7 @@ with gr.Blocks(css=custom_css) as demo:
42
 
43
  # Define update function
44
  def update_queue_info():
45
- queue_info = get_queue_status()
46
- return ui_get_queue_info(queue_info, task_history)
47
 
48
  # Update queue info periodically
49
  demo.load(update_queue_info, None, queue_info_html, every=3)
@@ -60,5 +674,4 @@ if __name__ == "__main__":
60
  demo.launch()
61
  finally:
62
  # Stop worker threads
63
- from src.queue_manager import running
64
- globals()['running'] = False
 
4
  import os
5
  import sys
6
  from pathlib import Path
7
+ import concurrent.futures
8
  import multiprocessing
9
+ import time
10
+ import threading
11
+ import queue
12
+ import uuid
13
+ import numpy as np
14
+ from datetime import datetime
15
+ from tqdm.auto import tqdm
16
+ from src.containerized_eval import eval_string_script
17
 
18
  # Add current directory and src directory to module search path
19
  current_dir = os.path.dirname(os.path.abspath(__file__))
 
23
  if src_dir not in sys.path:
24
  sys.path.append(src_dir)
25
 
26
+ # Create message queue
27
+ task_queue = queue.Queue()
28
+ # Dictionary to store task status
29
+ task_status = {}
30
+ # List to store task history, max 200 tasks
31
+ task_history = []
32
+ # Lock for shared resources
33
+ lock = threading.Lock()
34
+ # Number of worker threads
35
+ worker_threads = max(1, multiprocessing.cpu_count() // 2) # Using half the available cores for better stability
36
+ # Flag for running background threads
37
+ running = True
38
+ # Mapping from task type to processing time
39
+ task_type_times = {}
40
+
41
+ def queue_processor():
42
+ """Process tasks in the queue"""
43
+ while running:
44
+ try:
45
+ task_id, input_data, request_time = task_queue.get(timeout=0.1)
46
+ with lock:
47
+ task_status[task_id]['status'] = 'processing'
48
+ task_status[task_id]['start_time'] = time.time()
49
+
50
+ if isinstance(input_data, list) and len(input_data) > 0:
51
+ sample_task = input_data[0]
52
+ language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
53
+ task_size = len(input_data)
54
+ task_complexity = _estimate_task_complexity(input_data)
55
+
56
+ with lock:
57
+ task_status[task_id]['estimated_factors'] = {
58
+ 'language': language,
59
+ 'size': task_size,
60
+ 'complexity': task_complexity
61
+ }
62
+
63
+ result = evaluate(input_data)
64
+
65
+ end_time = time.time()
66
+ process_time = end_time - task_status[task_id]['start_time']
67
+
68
+ with lock:
69
+ task_status[task_id]['status'] = 'completed'
70
+ task_status[task_id]['result'] = result
71
+ task_status[task_id]['end_time'] = end_time
72
+ task_status[task_id]['process_time'] = process_time
73
+
74
+ if 'estimated_factors' in task_status[task_id]:
75
+ factors = task_status[task_id]['estimated_factors']
76
+ key = f"{factors['language']}_{factors['complexity']}"
77
+
78
+ if key not in task_type_times:
79
+ task_type_times[key] = []
80
+
81
+ task_type_times[key].append(process_time / factors['size'])
82
+ if len(task_type_times[key]) > 10:
83
+ task_type_times[key] = task_type_times[key][-10:]
84
+
85
+ task_history.append({
86
+ 'task_id': task_id,
87
+ 'request_time': request_time,
88
+ 'process_time': process_time,
89
+ 'status': 'completed',
90
+ 'factors': task_status[task_id].get('estimated_factors', {})
91
+ })
92
+ while len(task_history) > 200:
93
+ task_history.pop(0)
94
+
95
+ task_queue.task_done()
96
+
97
+ except queue.Empty:
98
+ continue
99
+ except Exception as e:
100
+ if 'task_id' in locals():
101
+ with lock:
102
+ task_status[task_id]['status'] = 'error'
103
+ task_status[task_id]['error'] = str(e)
104
+ task_status[task_id]['end_time'] = time.time()
105
+ task_queue.task_done()
106
+
107
+ def _estimate_task_complexity(tasks):
108
+ """Estimate task complexity
109
+
110
+ Returns: 'simple', 'medium', or 'complex'
111
+ """
112
+ total_code_length = 0
113
+ count = 0
114
+
115
+ for task in tasks:
116
+ if isinstance(task, dict):
117
+ prompt = task.get('prompt', '')
118
+ tests = task.get('tests', '')
119
+ completions = task.get('processed_completions', [])
120
+
121
+ code_length = len(prompt) + len(tests)
122
+ if completions:
123
+ code_length += sum(len(comp) for comp in completions)
124
+
125
+ total_code_length += code_length
126
+ count += 1
127
+
128
+ if count == 0:
129
+ return 'medium'
130
+
131
+ avg_length = total_code_length / count
132
+
133
+ if avg_length < 1000:
134
+ return 'simple'
135
+ elif avg_length < 5000:
136
+ return 'medium'
137
+ else:
138
+ return 'complex'
139
+
140
+ def evaluate(input_data):
141
+ """Main function for code evaluation"""
142
+ try:
143
+ if not isinstance(input_data, list):
144
+ return {"status": "Exception", "error": "Input must be a list"}
145
+
146
+ results = []
147
+
148
+ # Use a moderate number of workers for all language tests to ensure stability
149
+ # This prevents resource contention regardless of language
150
+ max_workers = max(1, min(multiprocessing.cpu_count() // 2, 4))
151
+
152
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
153
+ future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
154
+ for future in concurrent.futures.as_completed(future_to_item):
155
+ item = future_to_item[future]
156
+ try:
157
+ result = future.result()
158
+ item.update(result)
159
+ results.append(item)
160
+ except Exception as e:
161
+ item.update({"status": "Exception", "error": str(e)})
162
+ results.append(item)
163
+ return results
164
+
165
+ except Exception as e:
166
+ return {"status": "Exception", "error": str(e)}
167
+
168
+ def evaluate_single_case(input_data):
169
+ """Evaluate a single code case"""
170
+ try:
171
+ if not isinstance(input_data, dict):
172
+ return {"status": "Exception", "error": "Input item must be a dictionary"}
173
+
174
+ language = input_data.get('language')
175
+ completions = input_data.get('processed_completions', [])
176
+
177
+ if not completions:
178
+ return {"status": "Exception", "error": "No code provided"}
179
+
180
+ # Use a retry mechanism for all languages for better reliability
181
+ max_retries = 2 # One retry for all languages
182
+
183
+ results = []
184
+ for comp in completions:
185
+ code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
186
+
187
+ # Try up to max_retries + 1 times for all test cases
188
+ for attempt in range(max_retries + 1):
189
+ result = evaluate_code(code, language)
190
+
191
+ # If success or last attempt, return/record the result
192
+ if result["status"] == "OK" or attempt == max_retries:
193
+ if result["status"] == "OK":
194
+ return result
195
+ results.append(result)
196
+ break
197
+
198
+ # For retries, briefly wait to allow resources to stabilize
199
+ time.sleep(0.3)
200
+
201
+ return results[0]
202
+
203
+ except Exception as e:
204
+ return {"status": "Exception", "error": str(e)}
205
+
206
+ def evaluate_code(code, language):
207
+ """Evaluate code in a specific language"""
208
+ try:
209
+ result = eval_string_script(language, code)
210
+ return result
211
+
212
+ except Exception as e:
213
+ return {"status": "Exception", "error": str(e)}
214
+
215
+ def synchronous_evaluate(input_data):
216
+ """Synchronously evaluate code, compatible with original interface"""
217
+ if isinstance(input_data, list) and len(input_data) > 0:
218
+ sample_task = input_data[0]
219
+ language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
220
+ task_size = len(input_data)
221
+ task_complexity = _estimate_task_complexity(input_data)
222
+ else:
223
+ language = 'unknown'
224
+ task_size = 1
225
+ task_complexity = 'medium'
226
+
227
+ estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
228
+ estimated_total_time = estimated_time_per_task * task_size
229
+
230
+ queue_info = get_queue_status()
231
+ waiting_tasks = queue_info['waiting_tasks']
232
+
233
+ task_id = str(uuid.uuid4())
234
+ request_time = time.time()
235
+
236
+ with lock:
237
+ task_status[task_id] = {
238
+ 'status': 'queued',
239
+ 'queued_time': request_time,
240
+ 'queue_position': task_queue.qsize() + 1,
241
+ 'synchronous': True,
242
+ 'estimated_factors': {
243
+ 'language': language,
244
+ 'size': task_size,
245
+ 'complexity': task_complexity
246
+ },
247
+ 'estimated_time': estimated_total_time
248
+ }
249
+
250
+ task_queue.put((task_id, input_data, request_time))
251
+
252
+ while True:
253
+ with lock:
254
+ if task_id in task_status:
255
+ status = task_status[task_id]['status']
256
+ if status == 'completed':
257
+ result = task_status[task_id]['result']
258
+ task_status.pop(task_id, None)
259
+ return result
260
+ elif status == 'error':
261
+ error = task_status[task_id].get('error', 'Unknown error')
262
+ task_status.pop(task_id, None)
263
+ return {"status": "Exception", "error": error}
264
+
265
+ time.sleep(0.1)
266
+
267
+ def _get_estimated_time_for_task(language, complexity):
268
+ """Get estimated processing time for a specific task type"""
269
+ key = f"{language}_{complexity}"
270
+
271
+ if key in task_type_times and len(task_type_times[key]) > 0:
272
+ return np.median(task_type_times[key])
273
+
274
+ if complexity == 'simple':
275
+ return 1.0
276
+ elif complexity == 'medium':
277
+ return 3.0
278
+ else: # complex
279
+ return 8.0
280
+
281
+ def enqueue_task(input_data):
282
+ """Add task to queue"""
283
+ if isinstance(input_data, list) and len(input_data) > 0:
284
+ sample_task = input_data[0]
285
+ language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
286
+ task_size = len(input_data)
287
+ task_complexity = _estimate_task_complexity(input_data)
288
+ else:
289
+ language = 'unknown'
290
+ task_size = 1
291
+ task_complexity = 'medium'
292
+
293
+ estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity)
294
+ estimated_total_time = estimated_time_per_task * task_size
295
+
296
+ task_id = str(uuid.uuid4())
297
+ request_time = time.time()
298
+
299
+ with lock:
300
+ task_status[task_id] = {
301
+ 'status': 'queued',
302
+ 'queued_time': request_time,
303
+ 'queue_position': task_queue.qsize() + 1,
304
+ 'estimated_factors': {
305
+ 'language': language,
306
+ 'size': task_size,
307
+ 'complexity': task_complexity
308
+ },
309
+ 'estimated_time': estimated_total_time
310
+ }
311
+
312
+ queue_info = get_queue_status()
313
+ est_wait = queue_info['estimated_wait']
314
+
315
+ task_queue.put((task_id, input_data, request_time))
316
+
317
+ return {
318
+ 'task_id': task_id,
319
+ 'status': 'queued',
320
+ 'queue_position': task_status[task_id]['queue_position'],
321
+ 'estimated_wait': est_wait,
322
+ 'estimated_processing': estimated_total_time
323
+ }
324
+
325
+ def check_status(task_id):
326
+ """Check task status"""
327
+ with lock:
328
+ if task_id not in task_status:
329
+ return {'status': 'not_found'}
330
+
331
+ status_info = task_status[task_id].copy()
332
+
333
+ if status_info['status'] in ['completed', 'error'] and time.time() - status_info.get('end_time', 0) > 3600:
334
+ task_status.pop(task_id, None)
335
+
336
+ return status_info
337
+
338
+ def get_queue_status():
339
+ """Get queue status"""
340
+ with lock:
341
+ queued_tasks = [t for t in task_status.values() if t['status'] == 'queued']
342
+ processing_tasks = [t for t in task_status.values() if t['status'] == 'processing']
343
+
344
+ queue_size = task_queue.qsize()
345
+ active_tasks = len(processing_tasks)
346
+ waiting_tasks = len(queued_tasks)
347
+
348
+ remaining_processing_time = 0
349
+ for task in processing_tasks:
350
+ if 'start_time' in task and 'estimated_time' in task:
351
+ elapsed = time.time() - task['start_time']
352
+ remaining = max(0, task['estimated_time'] - elapsed)
353
+ remaining_processing_time += remaining
354
+ else:
355
+ remaining_processing_time += 2
356
+
357
+ if active_tasks > 0:
358
+ remaining_processing_time = remaining_processing_time / min(active_tasks, worker_threads)
359
+
360
+ queued_processing_time = 0
361
+ for task in queued_tasks:
362
+ if 'estimated_time' in task:
363
+ queued_processing_time += task['estimated_time']
364
+ else:
365
+ queued_processing_time += 5
366
+
367
+ if worker_threads > 0 and queued_processing_time > 0:
368
+ queued_processing_time = queued_processing_time / worker_threads
369
+
370
+ estimated_wait = remaining_processing_time + queued_processing_time
371
+
372
+ if task_history:
373
+ prediction_ratios = []
374
+ for task in task_history:
375
+ if 'factors' in task and 'estimated_time' in task:
376
+ prediction_ratios.append(task['process_time'] / task['estimated_time'])
377
+
378
+ if prediction_ratios:
379
+ correction_factor = np.median(prediction_ratios)
380
+ correction_factor = max(0.5, min(2.0, correction_factor))
381
+ estimated_wait *= correction_factor
382
+
383
+ estimated_wait = max(0.1, estimated_wait)
384
+ if waiting_tasks == 0 and active_tasks == 0:
385
+ estimated_wait = 0
386
+
387
+ recent_tasks = task_history[-5:] if task_history else []
388
+
389
+ return {
390
+ 'queue_size': queue_size,
391
+ 'active_tasks': active_tasks,
392
+ 'waiting_tasks': waiting_tasks,
393
+ 'worker_threads': worker_threads,
394
+ 'estimated_wait': estimated_wait,
395
+ 'recent_tasks': recent_tasks
396
+ }
397
+
398
+ def format_time(seconds):
399
+ """Format time into readable format"""
400
+ if seconds < 60:
401
+ return f"{seconds:.1f} seconds"
402
+ elif seconds < 3600:
403
+ minutes = int(seconds / 60)
404
+ seconds = seconds % 60
405
+ return f"{minutes}m {seconds:.1f}s"
406
+ else:
407
+ hours = int(seconds / 3600)
408
+ minutes = int((seconds % 3600) / 60)
409
+ return f"{hours}h {minutes}m"
410
+
411
+ def ui_get_queue_info():
412
+ """Get queue info for UI"""
413
+ queue_info = get_queue_status()
414
+
415
+ tasks_html = ""
416
+ for task in reversed(queue_info['recent_tasks']):
417
+ tasks_html += f"""
418
+ <tr>
419
+ <td>{task['task_id'][:8]}...</td>
420
+ <td>{datetime.fromtimestamp(task['request_time']).strftime('%H:%M:%S')}</td>
421
+ <td>{format_time(task['process_time'])}</td>
422
+ </tr>
423
+ """
424
+
425
+ if not tasks_html:
426
+ tasks_html = """
427
+ <tr>
428
+ <td colspan="3" style="text-align: center; padding: 20px;">No historical tasks</td>
429
+ </tr>
430
+ """
431
+
432
+ return f"""
433
+ <div class="dashboard">
434
+ <div class="queue-info-card main-card">
435
+ <h3 class="card-title">Queue Status Monitor</h3>
436
+ <div class="queue-stats">
437
+ <div class="stat-item">
438
+ <div class="stat-value">{queue_info['waiting_tasks']}</div>
439
+ <div class="stat-label">Waiting</div>
440
+ </div>
441
+ <div class="stat-item">
442
+ <div class="stat-value">{queue_info['active_tasks']}</div>
443
+ <div class="stat-label">Processing</div>
444
+ </div>
445
+ <div class="stat-item">
446
+ <div class="stat-value">{queue_info['worker_threads']}</div>
447
+ <div class="stat-label">Worker Threads</div>
448
+ </div>
449
+ </div>
450
+
451
+ <div class="wait-time">
452
+ <p><b>Current Estimated Wait Time:</b> {format_time(queue_info['estimated_wait'])}</p>
453
+ <p class="last-update"><small>Last update: {datetime.now().strftime('%H:%M:%S')}</small></p>
454
+ </div>
455
+ </div>
456
+
457
+ <div class="queue-info-card history-card">
458
+ <h3 class="card-title">Recently Processed Tasks</h3>
459
+ <table class="recent-tasks">
460
+ <thead>
461
+ <tr>
462
+ <th>Task ID</th>
463
+ <th>Request Time</th>
464
+ <th>Processing Time</th>
465
+ </tr>
466
+ </thead>
467
+ <tbody>
468
+ {tasks_html}
469
+ </tbody>
470
+ </table>
471
+ </div>
472
+ </div>
473
+ """
474
+
475
+ def launch_workers():
476
+ """Launch worker threads"""
477
+ global running
478
+ running = True
479
+
480
+ for _ in range(worker_threads):
481
+ worker = threading.Thread(target=queue_processor)
482
+ worker.daemon = True
483
+ worker.start()
484
+
485
+ # Custom CSS
486
+ custom_css = """
487
+ .container {
488
+ max-width: 1200px;
489
+ margin: 0 auto;
490
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
491
+ }
492
+
493
+ .dashboard {
494
+ display: flex;
495
+ flex-direction: column;
496
+ gap: 20px;
497
+ }
498
+
499
+ .card-title {
500
+ color: #333;
501
+ border-bottom: 2px solid #ddd;
502
+ padding-bottom: 10px;
503
+ margin-top: 0;
504
+ }
505
+
506
+ .status-card, .queue-info-card {
507
+ background: #fff;
508
+ border-radius: 12px;
509
+ padding: 20px;
510
+ margin: 10px 0;
511
+ box-shadow: 0 4px 15px rgba(0,0,0,0.08);
512
+ }
513
+
514
+ .main-card {
515
+ border-top: 5px solid #4285f4;
516
+ }
517
+
518
+ .history-card {
519
+ border-top: 5px solid #34a853;
520
+ }
521
+
522
+ .status-card.success {
523
+ background: #e7f5e7;
524
+ border-left: 5px solid #28a745;
525
+ }
526
+
527
+ .status-card.error {
528
+ background: #f8d7da;
529
+ border-left: 5px solid #dc3545;
530
+ }
531
+
532
+ .error-message {
533
+ color: #dc3545;
534
+ font-weight: bold;
535
+ padding: 10px;
536
+ background: #f8d7da;
537
+ border-radius: 5px;
538
+ }
539
+
540
+ .notice {
541
+ color: #0c5460;
542
+ background-color: #d1ecf1;
543
+ padding: 10px;
544
+ border-radius: 5px;
545
+ }
546
+
547
+ .queue-stats {
548
+ display: flex;
549
+ justify-content: space-around;
550
+ margin: 20px 0;
551
+ }
552
+
553
+ .stat-item {
554
+ text-align: center;
555
+ padding: 15px;
556
+ background: #f8f9fa;
557
+ border-radius: 10px;
558
+ min-width: 120px;
559
+ transition: transform 0.3s ease;
560
+ }
561
+
562
+ .stat-item:hover {
563
+ transform: translateY(-5px);
564
+ box-shadow: 0 5px 15px rgba(0,0,0,0.1);
565
+ }
566
+
567
+ .stat-value {
568
+ font-size: 32px;
569
+ font-weight: bold;
570
+ color: #4285f4;
571
+ margin-bottom: 5px;
572
+ }
573
+
574
+ .stat-label {
575
+ color: #5f6368;
576
+ font-size: 16px;
577
+ }
578
+
579
+ .wait-time {
580
+ text-align: center;
581
+ margin: 20px 0;
582
+ padding: 15px;
583
+ background: #f1f3f4;
584
+ border-radius: 8px;
585
+ font-size: 18px;
586
+ }
587
+
588
+ .last-update {
589
+ color: #80868b;
590
+ margin-top: 10px;
591
+ margin-bottom: 0;
592
+ }
593
+
594
+ .recent-tasks {
595
+ width: 100%;
596
+ border-collapse: collapse;
597
+ margin-top: 15px;
598
+ background: white;
599
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
600
+ }
601
+
602
+ .recent-tasks th, .recent-tasks td {
603
+ border: 1px solid #e0e0e0;
604
+ padding: 12px 15px;
605
+ text-align: center;
606
+ }
607
+
608
+ .recent-tasks th {
609
+ background-color: #f1f3f4;
610
+ color: #202124;
611
+ font-weight: 500;
612
+ }
613
+
614
+ .recent-tasks tbody tr:hover {
615
+ background-color: #f8f9fa;
616
+ }
617
+
618
+ .tabs {
619
+ margin-top: 20px;
620
+ }
621
+
622
+ button.primary {
623
+ background-color: #4285f4;
624
+ color: white;
625
+ padding: 10px 20px;
626
+ border: none;
627
+ border-radius: 4px;
628
+ cursor: pointer;
629
+ font-size: 16px;
630
+ font-weight: 500;
631
+ transition: background-color 0.3s;
632
+ }
633
+
634
+ button.primary:hover {
635
+ background-color: #3367d6;
636
+ }
637
+ """
638
 
639
  # Initialize and launch worker threads
640
  launch_workers()
 
657
 
658
  # Define update function
659
  def update_queue_info():
660
+ return ui_get_queue_info()
 
661
 
662
  # Update queue info periodically
663
  demo.load(update_queue_info, None, queue_info_html, every=3)
 
674
  demo.launch()
675
  finally:
676
  # Stop worker threads
677
+ running = False
 
src/evaluator.py DELETED
@@ -1,78 +0,0 @@
1
- import concurrent.futures
2
- import time
3
- from src.containerized_eval import eval_string_script
4
-
5
- def evaluate(input_data):
6
- """Main function for code evaluation"""
7
- try:
8
- if not isinstance(input_data, list):
9
- return {"status": "Exception", "error": "Input must be a list"}
10
-
11
- results = []
12
-
13
- # Use a moderate number of workers for all language tests to ensure stability
14
- # This prevents resource contention regardless of language
15
- max_workers = max(1, min(concurrent.futures.cpu_count() // 2, 4))
16
-
17
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
18
- future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
19
- for future in concurrent.futures.as_completed(future_to_item):
20
- item = future_to_item[future]
21
- try:
22
- result = future.result()
23
- item.update(result)
24
- results.append(item)
25
- except Exception as e:
26
- item.update({"status": "Exception", "error": str(e)})
27
- results.append(item)
28
- return results
29
-
30
- except Exception as e:
31
- return {"status": "Exception", "error": str(e)}
32
-
33
- def evaluate_single_case(input_data):
34
- """Evaluate a single code case"""
35
- try:
36
- if not isinstance(input_data, dict):
37
- return {"status": "Exception", "error": "Input item must be a dictionary"}
38
-
39
- language = input_data.get('language')
40
- completions = input_data.get('processed_completions', [])
41
-
42
- if not completions:
43
- return {"status": "Exception", "error": "No code provided"}
44
-
45
- # Use a retry mechanism for all languages for better reliability
46
- max_retries = 2 # One retry for all languages
47
-
48
- results = []
49
- for comp in completions:
50
- code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
51
-
52
- # Try up to max_retries + 1 times for all test cases
53
- for attempt in range(max_retries + 1):
54
- result = evaluate_code(code, language)
55
-
56
- # If success or last attempt, return/record the result
57
- if result["status"] == "OK" or attempt == max_retries:
58
- if result["status"] == "OK":
59
- return result
60
- results.append(result)
61
- break
62
-
63
- # For retries, briefly wait to allow resources to stabilize
64
- time.sleep(0.3)
65
-
66
- return results[0]
67
-
68
- except Exception as e:
69
- return {"status": "Exception", "error": str(e)}
70
-
71
- def evaluate_code(code, language):
72
- """Evaluate code in a specific language"""
73
- try:
74
- result = eval_string_script(language, code)
75
- return result
76
-
77
- except Exception as e:
78
- return {"status": "Exception", "error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/queue_manager.py DELETED
@@ -1,269 +0,0 @@
1
- import queue
2
- import threading
3
- import time
4
- import uuid
5
- import multiprocessing
6
- import numpy as np
7
- from datetime import datetime
8
- from src.evaluator import evaluate
9
- from src.utils import _estimate_task_complexity, _get_estimated_time_for_task
10
-
11
- # Create message queue
12
- task_queue = queue.Queue()
13
- # Dictionary to store task status
14
- task_status = {}
15
- # List to store task history, max 200 tasks
16
- task_history = []
17
- # Lock for shared resources
18
- lock = threading.Lock()
19
- # Number of worker threads
20
- worker_threads = max(1, multiprocessing.cpu_count() // 2) # Using half the available cores for better stability
21
- # Flag for running background threads
22
- running = True
23
- # Mapping from task type to processing time
24
- task_type_times = {}
25
-
26
- def queue_processor():
27
- """Process tasks in the queue"""
28
- while running:
29
- try:
30
- task_id, input_data, request_time = task_queue.get(timeout=0.1)
31
- with lock:
32
- task_status[task_id]['status'] = 'processing'
33
- task_status[task_id]['start_time'] = time.time()
34
-
35
- if isinstance(input_data, list) and len(input_data) > 0:
36
- sample_task = input_data[0]
37
- language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
38
- task_size = len(input_data)
39
- task_complexity = _estimate_task_complexity(input_data)
40
-
41
- with lock:
42
- task_status[task_id]['estimated_factors'] = {
43
- 'language': language,
44
- 'size': task_size,
45
- 'complexity': task_complexity
46
- }
47
-
48
- result = evaluate(input_data)
49
-
50
- end_time = time.time()
51
- process_time = end_time - task_status[task_id]['start_time']
52
-
53
- with lock:
54
- task_status[task_id]['status'] = 'completed'
55
- task_status[task_id]['result'] = result
56
- task_status[task_id]['end_time'] = end_time
57
- task_status[task_id]['process_time'] = process_time
58
-
59
- if 'estimated_factors' in task_status[task_id]:
60
- factors = task_status[task_id]['estimated_factors']
61
- key = f"{factors['language']}_{factors['complexity']}"
62
-
63
- if key not in task_type_times:
64
- task_type_times[key] = []
65
-
66
- task_type_times[key].append(process_time / factors['size'])
67
- if len(task_type_times[key]) > 10:
68
- task_type_times[key] = task_type_times[key][-10:]
69
-
70
- task_history.append({
71
- 'task_id': task_id,
72
- 'request_time': request_time,
73
- 'process_time': process_time,
74
- 'status': 'completed',
75
- 'factors': task_status[task_id].get('estimated_factors', {})
76
- })
77
- while len(task_history) > 200:
78
- task_history.pop(0)
79
-
80
- task_queue.task_done()
81
-
82
- except queue.Empty:
83
- continue
84
- except Exception as e:
85
- if 'task_id' in locals():
86
- with lock:
87
- task_status[task_id]['status'] = 'error'
88
- task_status[task_id]['error'] = str(e)
89
- task_status[task_id]['end_time'] = time.time()
90
- task_queue.task_done()
91
-
92
- def synchronous_evaluate(input_data):
93
- """Synchronously evaluate code, compatible with original interface"""
94
- if isinstance(input_data, list) and len(input_data) > 0:
95
- sample_task = input_data[0]
96
- language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
97
- task_size = len(input_data)
98
- task_complexity = _estimate_task_complexity(input_data)
99
- else:
100
- language = 'unknown'
101
- task_size = 1
102
- task_complexity = 'medium'
103
-
104
- estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity, task_type_times)
105
- estimated_total_time = estimated_time_per_task * task_size
106
-
107
- queue_info = get_queue_status()
108
- waiting_tasks = queue_info['waiting_tasks']
109
-
110
- task_id = str(uuid.uuid4())
111
- request_time = time.time()
112
-
113
- with lock:
114
- task_status[task_id] = {
115
- 'status': 'queued',
116
- 'queued_time': request_time,
117
- 'queue_position': task_queue.qsize() + 1,
118
- 'synchronous': True,
119
- 'estimated_factors': {
120
- 'language': language,
121
- 'size': task_size,
122
- 'complexity': task_complexity
123
- },
124
- 'estimated_time': estimated_total_time
125
- }
126
-
127
- task_queue.put((task_id, input_data, request_time))
128
-
129
- while True:
130
- with lock:
131
- if task_id in task_status:
132
- status = task_status[task_id]['status']
133
- if status == 'completed':
134
- result = task_status[task_id]['result']
135
- task_status.pop(task_id, None)
136
- return result
137
- elif status == 'error':
138
- error = task_status[task_id].get('error', 'Unknown error')
139
- task_status.pop(task_id, None)
140
- return {"status": "Exception", "error": error}
141
-
142
- time.sleep(0.1)
143
-
144
- def enqueue_task(input_data):
145
- """Add task to queue"""
146
- if isinstance(input_data, list) and len(input_data) > 0:
147
- sample_task = input_data[0]
148
- language = sample_task.get('language', 'unknown') if isinstance(sample_task, dict) else 'unknown'
149
- task_size = len(input_data)
150
- task_complexity = _estimate_task_complexity(input_data)
151
- else:
152
- language = 'unknown'
153
- task_size = 1
154
- task_complexity = 'medium'
155
-
156
- estimated_time_per_task = _get_estimated_time_for_task(language, task_complexity, task_type_times)
157
- estimated_total_time = estimated_time_per_task * task_size
158
-
159
- task_id = str(uuid.uuid4())
160
- request_time = time.time()
161
-
162
- with lock:
163
- task_status[task_id] = {
164
- 'status': 'queued',
165
- 'queued_time': request_time,
166
- 'queue_position': task_queue.qsize() + 1,
167
- 'estimated_factors': {
168
- 'language': language,
169
- 'size': task_size,
170
- 'complexity': task_complexity
171
- },
172
- 'estimated_time': estimated_total_time
173
- }
174
-
175
- queue_info = get_queue_status()
176
- est_wait = queue_info['estimated_wait']
177
-
178
- task_queue.put((task_id, input_data, request_time))
179
-
180
- return {
181
- 'task_id': task_id,
182
- 'status': 'queued',
183
- 'queue_position': task_status[task_id]['queue_position'],
184
- 'estimated_wait': est_wait,
185
- 'estimated_processing': estimated_total_time
186
- }
187
-
188
- def check_status(task_id):
189
- """Check task status"""
190
- with lock:
191
- if task_id not in task_status:
192
- return {'status': 'not_found'}
193
-
194
- status_info = task_status[task_id].copy()
195
-
196
- if status_info['status'] in ['completed', 'error'] and time.time() - status_info.get('end_time', 0) > 3600:
197
- task_status.pop(task_id, None)
198
-
199
- return status_info
200
-
201
- def get_queue_status():
202
- """Get queue status"""
203
- with lock:
204
- queued_tasks = [t for t in task_status.values() if t['status'] == 'queued']
205
- processing_tasks = [t for t in task_status.values() if t['status'] == 'processing']
206
-
207
- queue_size = task_queue.qsize()
208
- active_tasks = len(processing_tasks)
209
- waiting_tasks = len(queued_tasks)
210
-
211
- remaining_processing_time = 0
212
- for task in processing_tasks:
213
- if 'start_time' in task and 'estimated_time' in task:
214
- elapsed = time.time() - task['start_time']
215
- remaining = max(0, task['estimated_time'] - elapsed)
216
- remaining_processing_time += remaining
217
- else:
218
- remaining_processing_time += 2
219
-
220
- if active_tasks > 0:
221
- remaining_processing_time = remaining_processing_time / min(active_tasks, worker_threads)
222
-
223
- queued_processing_time = 0
224
- for task in queued_tasks:
225
- if 'estimated_time' in task:
226
- queued_processing_time += task['estimated_time']
227
- else:
228
- queued_processing_time += 5
229
-
230
- if worker_threads > 0 and queued_processing_time > 0:
231
- queued_processing_time = queued_processing_time / worker_threads
232
-
233
- estimated_wait = remaining_processing_time + queued_processing_time
234
-
235
- if task_history:
236
- prediction_ratios = []
237
- for task in task_history:
238
- if 'factors' in task and 'estimated_time' in task:
239
- prediction_ratios.append(task['process_time'] / task['estimated_time'])
240
-
241
- if prediction_ratios:
242
- correction_factor = np.median(prediction_ratios)
243
- correction_factor = max(0.5, min(2.0, correction_factor))
244
- estimated_wait *= correction_factor
245
-
246
- estimated_wait = max(0.1, estimated_wait)
247
- if waiting_tasks == 0 and active_tasks == 0:
248
- estimated_wait = 0
249
-
250
- recent_tasks = task_history[-5:] if task_history else []
251
-
252
- return {
253
- 'queue_size': queue_size,
254
- 'active_tasks': active_tasks,
255
- 'waiting_tasks': waiting_tasks,
256
- 'worker_threads': worker_threads,
257
- 'estimated_wait': estimated_wait,
258
- 'recent_tasks': recent_tasks
259
- }
260
-
261
- def launch_workers():
262
- """Launch worker threads"""
263
- global running
264
- running = True
265
-
266
- for _ in range(worker_threads):
267
- worker = threading.Thread(target=queue_processor)
268
- worker.daemon = True
269
- worker.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ui.py DELETED
@@ -1,218 +0,0 @@
1
- from datetime import datetime
2
- from src.utils import format_time
3
-
4
- def ui_get_queue_info(queue_info, task_history):
5
- """Get queue info for UI"""
6
- tasks_html = ""
7
- for task in reversed(queue_info['recent_tasks']):
8
- tasks_html += f"""
9
- <tr>
10
- <td>{task['task_id'][:8]}...</td>
11
- <td>{datetime.fromtimestamp(task['request_time']).strftime('%H:%M:%S')}</td>
12
- <td>{format_time(task['process_time'])}</td>
13
- </tr>
14
- """
15
-
16
- if not tasks_html:
17
- tasks_html = """
18
- <tr>
19
- <td colspan="3" style="text-align: center; padding: 20px;">No historical tasks</td>
20
- </tr>
21
- """
22
-
23
- return f"""
24
- <div class="dashboard">
25
- <div class="queue-info-card main-card">
26
- <h3 class="card-title">Queue Status Monitor</h3>
27
- <div class="queue-stats">
28
- <div class="stat-item">
29
- <div class="stat-value">{queue_info['waiting_tasks']}</div>
30
- <div class="stat-label">Waiting</div>
31
- </div>
32
- <div class="stat-item">
33
- <div class="stat-value">{queue_info['active_tasks']}</div>
34
- <div class="stat-label">Processing</div>
35
- </div>
36
- <div class="stat-item">
37
- <div class="stat-value">{queue_info['worker_threads']}</div>
38
- <div class="stat-label">Worker Threads</div>
39
- </div>
40
- </div>
41
-
42
- <div class="wait-time">
43
- <p><b>Current Estimated Wait Time:</b> {format_time(queue_info['estimated_wait'])}</p>
44
- <p class="last-update"><small>Last update: {datetime.now().strftime('%H:%M:%S')}</small></p>
45
- </div>
46
- </div>
47
-
48
- <div class="queue-info-card history-card">
49
- <h3 class="card-title">Recently Processed Tasks</h3>
50
- <table class="recent-tasks">
51
- <thead>
52
- <tr>
53
- <th>Task ID</th>
54
- <th>Request Time</th>
55
- <th>Processing Time</th>
56
- </tr>
57
- </thead>
58
- <tbody>
59
- {tasks_html}
60
- </tbody>
61
- </table>
62
- </div>
63
- </div>
64
- """
65
-
66
- # Custom CSS
67
- custom_css = """
68
- .container {
69
- max-width: 1200px;
70
- margin: 0 auto;
71
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
72
- }
73
-
74
- .dashboard {
75
- display: flex;
76
- flex-direction: column;
77
- gap: 20px;
78
- }
79
-
80
- .card-title {
81
- color: #333;
82
- border-bottom: 2px solid #ddd;
83
- padding-bottom: 10px;
84
- margin-top: 0;
85
- }
86
-
87
- .status-card, .queue-info-card {
88
- background: #fff;
89
- border-radius: 12px;
90
- padding: 20px;
91
- margin: 10px 0;
92
- box-shadow: 0 4px 15px rgba(0,0,0,0.08);
93
- }
94
-
95
- .main-card {
96
- border-top: 5px solid #4285f4;
97
- }
98
-
99
- .history-card {
100
- border-top: 5px solid #34a853;
101
- }
102
-
103
- .status-card.success {
104
- background: #e7f5e7;
105
- border-left: 5px solid #28a745;
106
- }
107
-
108
- .status-card.error {
109
- background: #f8d7da;
110
- border-left: 5px solid #dc3545;
111
- }
112
-
113
- .error-message {
114
- color: #dc3545;
115
- font-weight: bold;
116
- padding: 10px;
117
- background: #f8d7da;
118
- border-radius: 5px;
119
- }
120
-
121
- .notice {
122
- color: #0c5460;
123
- background-color: #d1ecf1;
124
- padding: 10px;
125
- border-radius: 5px;
126
- }
127
-
128
- .queue-stats {
129
- display: flex;
130
- justify-content: space-around;
131
- margin: 20px 0;
132
- }
133
-
134
- .stat-item {
135
- text-align: center;
136
- padding: 15px;
137
- background: #f8f9fa;
138
- border-radius: 10px;
139
- min-width: 120px;
140
- transition: transform 0.3s ease;
141
- }
142
-
143
- .stat-item:hover {
144
- transform: translateY(-5px);
145
- box-shadow: 0 5px 15px rgba(0,0,0,0.1);
146
- }
147
-
148
- .stat-value {
149
- font-size: 32px;
150
- font-weight: bold;
151
- color: #4285f4;
152
- margin-bottom: 5px;
153
- }
154
-
155
- .stat-label {
156
- color: #5f6368;
157
- font-size: 16px;
158
- }
159
-
160
- .wait-time {
161
- text-align: center;
162
- margin: 20px 0;
163
- padding: 15px;
164
- background: #f1f3f4;
165
- border-radius: 8px;
166
- font-size: 18px;
167
- }
168
-
169
- .last-update {
170
- color: #80868b;
171
- margin-top: 10px;
172
- margin-bottom: 0;
173
- }
174
-
175
- .recent-tasks {
176
- width: 100%;
177
- border-collapse: collapse;
178
- margin-top: 15px;
179
- background: white;
180
- box-shadow: 0 1px 3px rgba(0,0,0,0.05);
181
- }
182
-
183
- .recent-tasks th, .recent-tasks td {
184
- border: 1px solid #e0e0e0;
185
- padding: 12px 15px;
186
- text-align: center;
187
- }
188
-
189
- .recent-tasks th {
190
- background-color: #f1f3f4;
191
- color: #202124;
192
- font-weight: 500;
193
- }
194
-
195
- .recent-tasks tbody tr:hover {
196
- background-color: #f8f9fa;
197
- }
198
-
199
- .tabs {
200
- margin-top: 20px;
201
- }
202
-
203
- button.primary {
204
- background-color: #4285f4;
205
- color: white;
206
- padding: 10px 20px;
207
- border: none;
208
- border-radius: 4px;
209
- cursor: pointer;
210
- font-size: 16px;
211
- font-weight: 500;
212
- transition: background-color 0.3s;
213
- }
214
-
215
- button.primary:hover {
216
- background-color: #3367d6;
217
- }
218
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils.py DELETED
@@ -1,62 +0,0 @@
1
- import numpy as np
2
- from datetime import datetime
3
-
4
- def _estimate_task_complexity(tasks):
5
- """Estimate task complexity
6
-
7
- Returns: 'simple', 'medium', or 'complex'
8
- """
9
- total_code_length = 0
10
- count = 0
11
-
12
- for task in tasks:
13
- if isinstance(task, dict):
14
- prompt = task.get('prompt', '')
15
- tests = task.get('tests', '')
16
- completions = task.get('processed_completions', [])
17
-
18
- code_length = len(prompt) + len(tests)
19
- if completions:
20
- code_length += sum(len(comp) for comp in completions)
21
-
22
- total_code_length += code_length
23
- count += 1
24
-
25
- if count == 0:
26
- return 'medium'
27
-
28
- avg_length = total_code_length / count
29
-
30
- if avg_length < 1000:
31
- return 'simple'
32
- elif avg_length < 5000:
33
- return 'medium'
34
- else:
35
- return 'complex'
36
-
37
- def _get_estimated_time_for_task(language, complexity, task_type_times):
38
- """Get estimated processing time for a specific task type"""
39
- key = f"{language}_{complexity}"
40
-
41
- if key in task_type_times and len(task_type_times[key]) > 0:
42
- return np.median(task_type_times[key])
43
-
44
- if complexity == 'simple':
45
- return 1.0
46
- elif complexity == 'medium':
47
- return 3.0
48
- else: # complex
49
- return 8.0
50
-
51
- def format_time(seconds):
52
- """Format time into readable format"""
53
- if seconds < 60:
54
- return f"{seconds:.1f} seconds"
55
- elif seconds < 3600:
56
- minutes = int(seconds / 60)
57
- seconds = seconds % 60
58
- return f"{minutes}m {seconds:.1f}s"
59
- else:
60
- hours = int(seconds / 3600)
61
- minutes = int((seconds % 3600) / 60)
62
- return f"{hours}h {minutes}m"