docker_test / src /evaluator.py
3v324v23's picture
new1
dd10f90
raw
history blame
3.16 kB
import concurrent.futures
import multiprocessing
from .containerized_eval import eval_string_script
def evaluate(input_data):
"""评估代码的主函数
Args:
input_data: 列表(批量处理多个测试用例)
Returns:
list: 包含评估结果的列表
"""
try:
if not isinstance(input_data, list):
return {"status": "Exception", "error": "Input must be a list"}
results = []
max_workers = multiprocessing.cpu_count() // 2
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
for future in concurrent.futures.as_completed(future_to_item):
item = future_to_item[future]
try:
result = future.result()
item.update(result)
results.append(item)
except Exception as e:
item.update({"status": "Exception", "error": str(e)})
results.append(item)
return results
except Exception as e:
return {"status": "Exception", "error": str(e)}
def evaluate_single_case(input_data):
"""评估单个代码用例
Args:
input_data: 字典(包含代码信息)
Returns:
dict: 包含评估结果的字典
"""
try:
if not isinstance(input_data, dict):
return {"status": "Exception", "error": "Input item must be a dictionary"}
language = input_data.get('language')
completions = input_data.get('processed_completions', [])
if not completions:
return {"status": "Exception", "error": "No code provided"}
results = []
for comp in completions:
code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
result = evaluate_code(code, language)
if result["status"] == "OK":
return {"status": "pass", "compiled_code": code}
print(f'Code failed to compile: \n{code}')
result["compiled_code"] = code
results.append(result)
return results[0]
except Exception as e:
return {"status": "Exception", "error": str(e)}
def evaluate_code(code, language):
"""评估特定语言的代码
Args:
code (str): 要评估的代码
language (str): 编程语言
Returns:
dict: 包含评估结果的字典
"""
try:
# 使用containerized_eval中的eval_string_script函数
result = eval_string_script(language, code)
if result["exit_code"] == 0:
return {"status": "OK", "output": result["stdout"]}
else:
return {
"status": "Error",
"error": result["stderr"] if result["stderr"] else "Unknown error",
"output": result["stdout"]
}
except Exception as e:
return {"status": "Exception", "error": str(e)}