File size: 3,163 Bytes
dd10f90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import concurrent.futures
import multiprocessing
from .containerized_eval import eval_string_script

def evaluate(input_data):
    """评估代码的主函数
    
    Args:
        input_data: 列表(批量处理多个测试用例)
        
    Returns:
        list: 包含评估结果的列表
    """
    try:
        if not isinstance(input_data, list):
            return {"status": "Exception", "error": "Input must be a list"}
            
        results = []
        max_workers = multiprocessing.cpu_count() // 2
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
            for future in concurrent.futures.as_completed(future_to_item):
                item = future_to_item[future]
                try:
                    result = future.result()
                    item.update(result)
                    results.append(item)
                except Exception as e:
                    item.update({"status": "Exception", "error": str(e)})
                    results.append(item)
        return results
            
    except Exception as e:
        return {"status": "Exception", "error": str(e)}

def evaluate_single_case(input_data):
    """评估单个代码用例
    
    Args:
        input_data: 字典(包含代码信息)
        
    Returns:
        dict: 包含评估结果的字典
    """
    try:
        if not isinstance(input_data, dict):
            return {"status": "Exception", "error": "Input item must be a dictionary"}
            
        language = input_data.get('language')
        completions = input_data.get('processed_completions', [])

        if not completions:
            return {"status": "Exception", "error": "No code provided"}

        results = []
        for comp in completions:
            code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
            result = evaluate_code(code, language)
            if result["status"] == "OK":
                return {"status": "pass", "compiled_code": code}
            print(f'Code failed to compile: \n{code}')
            result["compiled_code"] = code
            results.append(result)
            
        return results[0]
                
    except Exception as e:
        return {"status": "Exception", "error": str(e)}

def evaluate_code(code, language):
    """评估特定语言的代码
    
    Args:
        code (str): 要评估的代码
        language (str): 编程语言
        
    Returns:
        dict: 包含评估结果的字典
    """
    try:
        # 使用containerized_eval中的eval_string_script函数
        result = eval_string_script(language, code)
        
        if result["exit_code"] == 0:
            return {"status": "OK", "output": result["stdout"]}
        else:
            return {
                "status": "Error",
                "error": result["stderr"] if result["stderr"] else "Unknown error",
                "output": result["stdout"]
            }

    except Exception as e:
        return {"status": "Exception", "error": str(e)}