docker_test / app.py
朱东升
requirements update26
b01f5f4
raw
history blame
7.61 kB
import gradio as gr
import json
import importlib
import os
import sys
from pathlib import Path
import concurrent.futures
import multiprocessing
from src.containerized_eval import eval_string_script
# 添加当前目录和src目录到模块搜索路径
current_dir = os.path.dirname(os.path.abspath(__file__))
src_dir = os.path.join(current_dir, "src")
if current_dir not in sys.path:
sys.path.append(current_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
def evaluate(input_data):
"""评估代码的主函数
Args:
input_data: 列表(批量处理多个测试用例)
Returns:
list: 包含评估结果的列表
"""
try:
if not isinstance(input_data, list):
return {"status": "Exception", "error": "Input must be a list"}
results = []
# 定义系统错误关键词,用于判断是否需要重试
system_error_keywords = [
"resource", "timeout", "busy", "congestion", "memory",
"connection", "system", "overload", "refused", "reset"
]
max_workers = multiprocessing.cpu_count()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
for future in concurrent.futures.as_completed(future_to_item):
item = future_to_item[future]
try:
result = future.result()
# 检查是否是系统错误,如果是,立即重试一次
if isinstance(result, dict) and result.get("status") == "Exception":
error_msg = str(result.get("error", "")).lower()
# 如果错误信息包含系统错误关键词,则重试
if any(keyword in error_msg for keyword in system_error_keywords):
print(f"检测到系统错误: {error_msg},正在重试...")
# 立即重试
retry_result = evaluate_single_case(item)
if isinstance(retry_result, dict) and retry_result.get("status") != "Exception":
# 重试成功,使用重试结果
result = retry_result
print(f"重试成功")
else:
print(f"重试失败")
# 检查结果列表
if isinstance(result, list):
for i, res in enumerate(result):
if isinstance(res, dict) and res.get("status") == "Exception":
error_msg = str(res.get("error", "")).lower()
# 如果错误信息包含系统错误关键词,则重试
if any(keyword in error_msg for keyword in system_error_keywords):
print(f"检测到列表中的系统错误: {error_msg},正在重试...")
# 仅重试这个失败的情况
code = item.get('prompt') + item.get('processed_completions', [])[i] + '\n' + item.get('tests')
retry_result = evaluate_code(code, item.get('language'))
if isinstance(retry_result, dict) and retry_result.get("status") != "Exception":
# 重试成功,更新结果
result[i] = retry_result
print(f"重试成功")
else:
print(f"重试失败")
# 如果是超时错误,也尝试重试一次
if isinstance(result, dict) and result.get("status") == "Timeout":
print(f"检测到超时错误,正在重试...")
# 立即重试
retry_result = evaluate_single_case(item)
if isinstance(retry_result, dict) and retry_result.get("status") != "Timeout":
# 重试成功,使用重试结果
result = retry_result
print(f"重试成功")
else:
print(f"重试失败")
item.update(result)
results.append(item)
except Exception as e:
error_msg = str(e).lower()
# 检查是否是系统错误
if any(keyword in error_msg for keyword in system_error_keywords):
print(f"执行过程中检测到系统错误: {error_msg},正在重试...")
try:
# 立即重试
retry_result = evaluate_single_case(item)
item.update(retry_result)
results.append(item)
print(f"重试成功")
continue
except Exception as retry_e:
print(f"重试失败: {str(retry_e)}")
# 如果重试失败或不是系统错误,记录原始错误
item.update({"status": "Exception", "error": str(e)})
results.append(item)
return results
except Exception as e:
return {"status": "Exception", "error": str(e)}
def evaluate_single_case(input_data):
"""评估单个代码用例
Args:
input_data: 字典(包含代码信息)
Returns:
dict: 包含评估结果的字典
"""
try:
if not isinstance(input_data, dict):
return {"status": "Exception", "error": "Input item must be a dictionary"}
language = input_data.get('language')
completions = input_data.get('processed_completions', [])
if not completions:
return {"status": "Exception", "error": "No code provided"}
results = []
for comp in completions:
code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
result = evaluate_code(code, language)
if result["status"] == "OK":
return result
results.append(result)
return results[0]
except Exception as e:
return {"status": "Exception", "error": str(e)}
def evaluate_code(code, language):
"""评估特定语言的代码
Args:
code (str): 要评估的代码
language (str): 编程语言
Returns:
dict: 包含评估结果的字典
"""
try:
# 使用containerized_eval中的eval_string_script函数
result = eval_string_script(language, code)
return result
except Exception as e:
return {"status": "Exception", "error": str(e)}
# 创建Gradio接口
demo = gr.Interface(
fn=evaluate,
inputs=gr.JSON(),
outputs=gr.JSON(),
title="代码评估服务",
description="支持多种编程语言的代码评估服务"
)
if __name__ == "__main__":
demo.launch()