Spaces:

dongsheng
/

docker_test

Sleeping

File size: 4,312 Bytes

import gradio as gr
import json
import importlib
import os
import sys
from pathlib import Path

# 添加当前目录和src目录到模块搜索路径
current_dir = os.path.dirname(os.path.abspath(__file__))
src_dir = os.path.join(current_dir, "src")
if current_dir not in sys.path:
    sys.path.append(current_dir)
if src_dir not in sys.path:
    sys.path.append(src_dir)

def evaluate(input_data):
    """评估代码的主函数
    
    Args:
        input_data: 可以是字符串(文件路径)、字典(包含代码信息)或列表(批量处理多个测试用例)
        
    Returns:
        dict/list: 包含评估结果的字典或列表
    """
    try:
        # 如果输入是列表(批量处理)
        if isinstance(input_data, list):
            results = []
            for item in input_data:
                result = evaluate_single_case(item)
                results.append(result)
            return results
        
        # 单个用例处理
        return evaluate_single_case(input_data)
            
    except Exception as e:
        return {"status": "Exception", "error": str(e)}

def evaluate_single_case(input_data):
    """评估单个代码用例
    
    Args:
        input_data: 可以是字符串(文件路径)或字典(包含代码信息)
        
    Returns:
        dict: 包含评估结果的字典
    """
    try:
        # 如果输入是文件路径
        if isinstance(input_data, str):
            with open(input_data, 'r') as f:
                code = f.read()
            # 从文件扩展名确定语言
            language = Path(input_data).suffix[1:]
            result = evaluate_code(code, language)
            return result
        
        # 如果输入是字典
        elif isinstance(input_data, dict):
            language = input_data.get('language')
            completions = input_data.get('processed_completions', [])

            if not completions:
                return {"status": "Exception", "error": "No code provided"}

            # 评估所有完成的代码
            results = []
            for comp in completions:
                code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
                result = evaluate_code(code, language)
                # 如果当前代码执行成功，立即返回pass，不再评估后续代码
                if result["status"] == "OK":
                    return {"status": "pass"}
                print(f'Code failed to compile: \n{code}')
                result['compiled_code'] = code
                results.append(result)
                
            # 所有代码都执行失败，返回第一个失败结果
            return results[0]
                
        else:
            return {"status": "Exception", "error": "Invalid input format"}
            
    except Exception as e:
        return {"status": "Exception", "error": str(e)}

def evaluate_code(code, language):
    """评估特定语言的代码
    
    Args:
        code (str): 要评估的代码
        language (str): 编程语言
        
    Returns:
        dict: 包含评估结果的字典
    """
    try:
        # 动态导入对应语言的评估模块
        module_name = f"src.eval_{language.lower()}"
        module = importlib.import_module(module_name)

        # 使用系统临时目录而不是固定的temp目录
        import tempfile
        
        # 创建临时文件
        with tempfile.NamedTemporaryFile(suffix=f".{language}", delete=False) as temp_file:
            temp_file_path = temp_file.name
            temp_file.write(code.encode('utf-8'))
        
        # 调用对应语言的评估函数
        result = module.eval_script(temp_file_path)

        # 清理临时文件
        if os.path.exists(temp_file_path):
            os.unlink(temp_file_path)

        return result

    except ImportError as e:
        return {"status": "Exception", "error": f"Language {language} not supported: {str(e)}"}
    except Exception as e:
        return {"status": "Exception", "error": str(e)}

# 创建Gradio接口
demo = gr.Interface(
    fn=evaluate,
    inputs=gr.JSON(),
    outputs=gr.JSON(),
    title="代码评估服务",
    description="支持多种编程语言的代码评估服务"
)

if __name__ == "__main__":
    demo.launch()