Spaces:

dongsheng
/

docker_test

Sleeping

App Files Files Community

朱东升 commited on Mar 17

Commit

a27816a

1 Parent(s): cd10de7

requirements update29

Browse files

Files changed (1) hide show

app.py +71 -86

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 import sys
 import concurrent.futures
 import multiprocessing
-import gradio as gr
 from src.containerized_eval import eval_string_script
 # 添加当前目录和src目录到模块搜索路径
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_dir = os.path.join(current_dir, "src")
@@ -15,53 +16,6 @@ if current_dir not in sys.path:
 if src_dir not in sys.path:
     sys.path.append(src_dir)
-# 定义系统错误关键词，用于判断是否需要重试
-SYSTEM_ERROR_KEYWORDS = [
-    "resource", "timeout", "busy", "congestion", "memory",
-    "connection", "system", "overload", "refused", "reset"
-]
-def is_system_error(error_msg):
-    """检查错误信息是否包含系统错误关键词
-    Args:
-        error_msg (str): 错误信息
-    Returns:
-        bool: 是否是系统错误
-    """
-    error_msg = str(error_msg).lower()
-    return any(keyword in error_msg for keyword in SYSTEM_ERROR_KEYWORDS)
-def retry_with_logging(func, args, test_name="未知测试用例", error_context=""):
-    """带日志记录的重试函数
-    Args:
-        func: 要重试的函数
-        args: 函数参数
-        test_name (str): 测试用例名称
-        error_context (str): 错误上下文描述
-    Returns:
-        tuple: (结果, 是否成功)
-    """
-    try:
-        print(f"{error_context}，正在重试测试用例 '{test_name}'...")
-        result = func(*args)
-        success = True
-        if isinstance(result, dict) and result.get("status") == "Exception":
-            success = False
-        else:
-            print(f"测试用例 '{test_name}' 重试成功")
-        return result, success
-    except Exception as e:
-        print(f"测试用例 '{test_name}' 重试失败: {str(e)}")
-        return {"status": "Exception", "error": str(e)}, False
 def evaluate(input_data):
     """评估代码的主函数
@@ -76,48 +30,83 @@ def evaluate(input_data):
             return {"status": "Exception", "error": "Input must be a list"}
         results = []
-        max_workers = multiprocessing.cpu_count()
         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
             future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
             for future in concurrent.futures.as_completed(future_to_item):
                 item = future_to_item[future]
                 try:
                     result = future.result()
-                    # 处理结果中的系统错误
-                    for i, res in enumerate(result):
-                        if isinstance(res, dict) and res.get("status") == "Exception" and is_system_error(res.get("error", "")):
-                            test_name = item.get('name', '未知测试用例')
-                            error_context = f"检测到列表中的系统错误: {res.get('error', '')}"
-                            # 仅重试这个失败的情况
-                            code = item.get('prompt') + item.get('processed_completions', [])[i] + '\n' + item.get('tests')
-                            retry_result, success = retry_with_logging(
-                                evaluate_code,
-                                [code, item.get('language')],
-                                test_name,
-                                error_context
-                            )
-                            if success:
-                                result[i] = retry_result
                     item.update(result)
                     results.append(item)
                 except Exception as e:
-                    # 处理执行过程中的系统错误
-                    if is_system_error(e):
-                        test_name = item.get('name', '未知测试用例')
-                        error_context = f"执行过程中检测到系统错误: {str(e)}"
-                        retry_result, success = retry_with_logging(
-                            evaluate_single_case,
-                            [item],
-                            test_name,
-                            error_context
-                        )
-                        if success:
                             item.update(retry_result)
                             results.append(item)
                             continue
                     # 如果重试失败或不是系统错误，记录原始错误
                     item.update({"status": "Exception", "error": str(e)})
@@ -127,7 +116,6 @@ def evaluate(input_data):
     except Exception as e:
         return {"status": "Exception", "error": str(e)}
 def evaluate_single_case(input_data):
     """评估单个代码用例
@@ -137,10 +125,10 @@ def evaluate_single_case(input_data):
     Returns:
         dict: 包含评估结果的字典
     """
-    if not isinstance(input_data, dict):
-        return {"status": "Exception", "error": "Input item must be a dictionary"}
     try:
         language = input_data.get('language')
         completions = input_data.get('processed_completions', [])
@@ -160,7 +148,6 @@ def evaluate_single_case(input_data):
     except Exception as e:
         return {"status": "Exception", "error": str(e)}
 def evaluate_code(code, language):
     """评估特定语言的代码
@@ -179,7 +166,6 @@ def evaluate_code(code, language):
     except Exception as e:
         return {"status": "Exception", "error": str(e)}
 # 创建Gradio接口
 demo = gr.Interface(
     fn=evaluate,
@@ -189,6 +175,5 @@ demo = gr.Interface(
     description="支持多种编程语言的代码评估服务"
 )
 if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+import json
+import importlib
 import os
 import sys
+from pathlib import Path
 import concurrent.futures
 import multiprocessing
 from src.containerized_eval import eval_string_script
 # 添加当前目录和src目录到模块搜索路径
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_dir = os.path.join(current_dir, "src")
 if src_dir not in sys.path:
     sys.path.append(src_dir)
 def evaluate(input_data):
     """评估代码的主函数
             return {"status": "Exception", "error": "Input must be a list"}
         results = []
+        # 定义系统错误关键词，用于判断是否需要重试
+        system_error_keywords = [
+            "resource", "timeout", "busy", "congestion", "memory",
+            "connection", "system", "overload", "refused", "reset"
+        ]
+        max_workers = multiprocessing.cpu_count()
         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
             future_to_item = {executor.submit(evaluate_single_case, item): item for item in input_data}
             for future in concurrent.futures.as_completed(future_to_item):
                 item = future_to_item[future]
                 try:
                     result = future.result()
+                    # 检查是否是系统错误，如果是，立即重试一次
+                    if isinstance(result, dict) and result.get("status") == "Exception":
+                        error_msg = str(result.get("error", "")).lower()
+                        # 如果错误信息包含系统错误关键词，则重试
+                        if any(keyword in error_msg for keyword in system_error_keywords):
+                            print(f"检测到系统错误: {error_msg}，正在重试...")
+                            # 立即重试
+                            retry_result = evaluate_single_case(item)
+                            if isinstance(retry_result, dict) and retry_result.get("status") != "Exception":
+                                # 重试成功，使用重试结果
+                                result = retry_result
+                                print(f"重试成功")
+                            else:
+                                print(f"重试失败")
+                    # 检查结���列表
+                    if isinstance(result, list):
+                        for i, res in enumerate(result):
+                            if isinstance(res, dict) and res.get("status") == "Exception":
+                                error_msg = str(res.get("error", "")).lower()
+                                # 如果错误信息包含系统错误关键词，则重试
+                                if any(keyword in error_msg for keyword in system_error_keywords):
+                                    print(f"检测到列表中的系统错误: {error_msg}，正在重试...")
+                                    # 仅重试这个失败的情况
+                                    code = item.get('prompt') + item.get('processed_completions', [])[i] + '\n' + item.get('tests')
+                                    retry_result = evaluate_code(code, item.get('language'))
+                                    if isinstance(retry_result, dict) and retry_result.get("status") != "Exception":
+                                        # 重试成功，更新结果
+                                        result[i] = retry_result
+                                        print(f"重试成功")
+                                    else:
+                                        print(f"重试失败")
+                    # 如果是超时错误，也尝试重试一次
+                    if isinstance(result, dict) and result.get("status") == "Timeout":
+                        print(f"检测到超时错误，正在重试...")
+                        # 立即重试
+                        retry_result = evaluate_single_case(item)
+                        if isinstance(retry_result, dict) and retry_result.get("status") != "Timeout":
+                            # 重试成功，使用重试结果
+                            result = retry_result
+                            print(f"重试成功")
+                        else:
+                            print(f"重试失败")
                     item.update(result)
                     results.append(item)
                 except Exception as e:
+                    error_msg = str(e).lower()
+                    # 检查是否是系统错误
+                    if any(keyword in error_msg for keyword in system_error_keywords):
+                        print(f"执行过程中检测到系统错误: {error_msg}，正在重试...")
+                        try:
+                            # 立即重试
+                            retry_result = evaluate_single_case(item)
                             item.update(retry_result)
                             results.append(item)
+                            print(f"重试成功")
                             continue
+                        except Exception as retry_e:
+                            print(f"重试失败: {str(retry_e)}")
                     # 如果重试失败或不是系统错误，记录原始错误
                     item.update({"status": "Exception", "error": str(e)})
     except Exception as e:
         return {"status": "Exception", "error": str(e)}
 def evaluate_single_case(input_data):
     """评估单个代码用例
     Returns:
         dict: 包含评估结果的字典
     """
     try:
+        if not isinstance(input_data, dict):
+            return {"status": "Exception", "error": "Input item must be a dictionary"}
         language = input_data.get('language')
         completions = input_data.get('processed_completions', [])
     except Exception as e:
         return {"status": "Exception", "error": str(e)}
 def evaluate_code(code, language):
     """评估特定语言的代码
     except Exception as e:
         return {"status": "Exception", "error": str(e)}
 # 创建Gradio接口
 demo = gr.Interface(
     fn=evaluate,
     description="支持多种编程语言的代码评估服务"
 )
 if __name__ == "__main__":
+    demo.launch()