Spaces:

dongsheng
/

docker_test

Sleeping

App Files Files Community

朱东升 commited on Mar 11

Commit

30b1610

1 Parent(s): c4f1102

.

Browse files

Files changed (37) hide show

.DS_Store +0 -0
Dockerfile +15 -0
app.py +99 -0
requirements.txt +3 -0
src/__init__.py +0 -0
src/containerized_eval.py +100 -0
src/eval_adb.py +64 -0
src/eval_clj.py +30 -0
src/eval_cpp.py +40 -0
src/eval_cs.py +68 -0
src/eval_dart.py +27 -0
src/eval_dfy.py +29 -0
src/eval_dlang.py +63 -0
src/eval_elixir.py +37 -0
src/eval_fs.py +17 -0
src/eval_go.py +42 -0
src/eval_hs.py +19 -0
src/eval_java.py +50 -0
src/eval_javascript.py +49 -0
src/eval_julia.py +21 -0
src/eval_lean.py +29 -0
src/eval_lua.py +17 -0
src/eval_luau.py +26 -0
src/eval_matlab.py +53 -0
src/eval_ocaml.py +21 -0
src/eval_php.py +20 -0
src/eval_pl.py +20 -0
src/eval_python.py +19 -0
src/eval_r.py +47 -0
src/eval_racket.py +49 -0
src/eval_ruby.py +43 -0
src/eval_rust.py +53 -0
src/eval_scala.py +37 -0
src/eval_sh.py +24 -0
src/eval_swift.py +30 -0
src/eval_ts.py +33 -0
src/eval_v.py +40 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM ghcr.io/nuprl/multipl-e-evaluation@sha256:11864ca95774df16c34b4cd1eac231f9e5466c7ea38dac98e5b5b053e18479de
+WORKDIR /app
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+COPY app.py .
+COPY src/ ./src/
+EXPOSE 7860
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import gradio as gr
+import json
+import importlib
+import os
+from pathlib import Path
+def evaluate(input_data):
+    """评估代码的主函数
+    Args:
+        input_data: 可以是字符串(文件路径)或字典(包含代码信息)
+    Returns:
+        dict: 包含评估结果的字典
+    """
+    try:
+        # 如果输入是文件路径
+        if isinstance(input_data, str):
+            with open(input_data, 'r') as f:
+                code = f.read()
+            # 从文件扩展名确定语言
+            language = Path(input_data).suffix[1:]
+            result = evaluate_code(code, language)
+            return result
+        # 如果输入是字典
+        elif isinstance(input_data, dict):
+            language = input_data.get('language')
+            completions = input_data.get('completions', [])
+            if not completions:
+                return {"status": "Exception", "error": "No code provided"}
+            # 评估所有完成的代码
+            results = []
+            for code in completions:
+                result = evaluate_code(code, language)
+                results.append(result)
+            # 如果任一代码执行成功，则返回成功
+            if any(r["status"] == "OK" for r in results):
+                return {"status": "pass"}
+            else:
+                return results[0]  # 返回第一个失败结果
+        else:
+            return {"status": "Exception", "error": "Invalid input format"}
+    except Exception as e:
+        return {"status": "Exception", "error": str(e)}
+def evaluate_code(code, language):
+    """评估特定语言的代码
+    Args:
+        code (str): 要评估的代码
+        language (str): 编程语言
+    Returns:
+        dict: 包含评估结果的字典
+    """
+    try:
+        # 动态导入对应语言的评估模块
+        module_name = f"src.eval_{language.lower()}"
+        module = importlib.import_module(module_name)
+        # 创建临时文件存储代码
+        temp_dir = Path("temp")
+        temp_dir.mkdir(exist_ok=True)
+        temp_file = temp_dir / f"temp.{language}"
+        with open(temp_file, "w") as f:
+            f.write(code)
+        # 调用对应语言的评估函数
+        result = module.eval_script(temp_file)
+        # 清理临时文件
+        if temp_file.exists():
+            temp_file.unlink()
+        return result
+    except ImportError:
+        return {"status": "Exception", "error": f"Language {language} not supported"}
+    except Exception as e:
+        return {"status": "Exception", "error": str(e)}
+# 创建Gradio接口
+demo = gr.Interface(
+    fn=evaluate,
+    inputs=gr.JSON(),
+    outputs=gr.JSON(),
+    title="代码评估服务",
+    description="支持多种编程语言的代码评估服务"
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio>=3.50.2
+pathlib>=1.0.1
+importlib>=1.0.4

src/__init__.py ADDED Viewed

File without changes

src/containerized_eval.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""
+NOTE: Nothing containerized about this any more. This is just a helper
+for problem_evaluator.py.
+"""
+from pathlib import Path
+import eval_adb
+import eval_ruby
+import eval_lua
+import eval_python
+import eval_rust
+import eval_julia
+import eval_java
+import eval_lua
+import eval_racket
+import eval_javascript
+import eval_swift
+import eval_cpp
+import eval_php
+import eval_dlang
+import eval_julia
+import eval_r
+import eval_fs
+import eval_ocaml
+import eval_matlab
+import eval_hs
+import eval_elixir
+import eval_clj
+import eval_v
+import eval_lean
+import eval_dart
+import tempfile
+EVALUATORS = {
+    "ada": (eval_adb.eval_script, ".adb"),
+    "rb": (eval_ruby.eval_script, ".rb"),
+    "lua": (eval_lua.eval_script, ".lua"),
+    "python": (eval_python.eval_script, ".py"),
+    "py": (eval_python.eval_script, ".py"),
+    "notypes.py": (eval_python.eval_script, ".py"),
+    "julia": (eval_julia.eval_script, ".jl"),
+    "java" : (eval_java.eval_script, ".java"),
+    "rust" : (eval_rust.eval_script, ".rs"),
+    "rs" : (eval_rust.eval_script, ".rs"),
+    "swift": (eval_swift.eval_script, ".swift"),
+    "lua": (eval_lua.eval_script, ".lua"),
+    "racket": (eval_racket.eval_script, ".rkt"),
+    "rkt": (eval_racket.eval_script, ".rkt"),
+    "javascript": (eval_javascript.eval_script, ".js"),
+    "js": (eval_javascript.eval_script, ".js"),
+    "cpp": (eval_cpp.eval_script, ".cpp"),
+    "php": (eval_php.eval_script, ".php"),
+    "humaneval_to_dlang.py": (eval_dlang.eval_script, ".d"),
+    "d": (eval_dlang.eval_script, ".d"),
+    "r": (eval_r.eval_script, ".r"),
+    "humaneval_to_r.py": (eval_r.eval_script, ".r"),
+    "jl": (eval_julia.eval_script, ".jl"),
+    "fs": (eval_fs.eval_script, ".fsx"),
+    "ml": (eval_ocaml.eval_script, ".ml"),
+    "m": (eval_matlab.eval_script, ".m"),
+    "hs": (eval_hs.eval_script, ".hs"),
+    "elixir": (eval_elixir.eval_script, ".exs"),
+    "clj": (eval_clj.eval_script, ".clj"),
+    "coq": (eval_v.eval_script, ".v"),
+    "lean": (eval_lean.eval_script, ".lean"),
+    "dart": (eval_dart.eval_script, ".dart"),
+}
+def eval_string_script(language, program):
+    if language in EVALUATORS:
+        (eval_script, file_ext) = EVALUATORS[language]
+    else:
+        eval_module = __import__(f"eval_{language}" if language != "go_test.go" else "eval_go")
+        eval_script = eval_module.eval_script
+        file_ext = f".{language}" if language != "go_test.go" else "_test.go"
+    with tempfile.NamedTemporaryFile(suffix=file_ext, delete=True) as f:
+        f.write(program.encode("utf-8"))
+        f.flush()
+        result = eval_script(Path(f.name))
+        # Only save the first 2K of output from the running program. Any futher
+        # output is very likely an exceptionally long stack trace or a long
+        # series of prints.
+        if type(result["stdout"]) == bytes:
+            result["stdout"] = result["stdout"].decode("utf-8", errors="ignore")
+        if result["stdout"] is None:
+            result["stdout"] = ""
+        if result["stderr"] is None:
+            result["stderr"] = ""
+        if type(result["stderr"]) == bytes:
+            result["stderr"] = result["stderr"].decode("utf-8", errors="ignore")
+        assert type(result["stdout"]) == str
+        assert type(result["stderr"]) == str
+        return {
+            "program": program,
+            "stdout": result['stdout'].replace("!!int", "")[:2048],
+            "stderr": result['stderr'][:2048],
+            "exit_code": result['exit_code'],
+            "status": result['status']
+        }

src/eval_adb.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from pathlib import Path
+from safe_subprocess import run
+from generic_eval import main
+LANG_NAME = "Ada"
+LANG_EXT = ".adb"
+def eval_script(path: Path):
+    working_dir: Path = path.parent / (path.stem + "_tmp")
+    working_dir.mkdir()
+    chop_result = run(["gnatchop", "-w", path, working_dir])
+    if chop_result.exit_code != 0:
+        return {
+            "status": "SyntaxError (gnatchop)",
+            "exit_code": chop_result.exit_code,
+            "stdout": chop_result.stdout,
+            "stderr": chop_result.stderr,
+        }
+    build_result = run(
+        [
+            "gnatmake",
+            "-gnatW8",
+            "main.adb",
+            "-o",
+            "main",
+            "-g",
+            "-j0",
+            "-gnata",
+            "-gnat2022",
+            "-gnateE",
+            "-bargs",
+            "-Es",
+        ],
+        cwd=str(working_dir),
+    )
+    if build_result.exit_code != 0:
+        return {
+            "status": "SyntaxError (gnatmake)",
+            "exit_code": build_result.exit_code,
+            "stdout": build_result.stdout,
+            "stderr": build_result.stderr,
+        }
+    status = "OK"
+    run_result = run(["./main"], cwd=str(working_dir))
+    if run_result.timeout:
+        status = "Timeout"
+    elif run_result.exit_code != 0:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": run_result.exit_code,
+        "stdout": run_result.stdout,
+        "stderr": run_result.stderr,
+    }
+if __name__ == "__main__":
+    main(eval_script, LANG_NAME, LANG_EXT)

src/eval_clj.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""
+Evaluates a generated Clojure program (.clj).
+"""
+import os
+from pathlib import Path
+from safe_subprocess import run
+from libeval import run_without_exn
+def eval_script(path: Path):
+    result = run(["clojure", "-J-Dclojure.main.report=stderr", "-M", str(path)])
+    if result.timeout:
+        status = "Timeout"
+    elif result.exit_code != 0:
+        status = "Exception"
+    elif "\n0 failures, 0 errors.\n" in result.stdout:
+        status = "OK"
+    else: # test failure
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": result.exit_code,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+    }
+if __name__ == "__main__":
+    main()

src/eval_cpp.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from pathlib import Path
+from safe_subprocess import run
+from generic_eval import main
+LANG_NAME = "C++"
+LANG_EXT = ".cpp"
+def eval_script(path: Path):
+    basename = ".".join(str(path).split(".")[:-1])
+    build_result = run(["g++", path, "-o", basename, "-std=c++17"])
+    if build_result.exit_code != 0:
+        return {
+            "status": "SyntaxError",
+            "exit_code": build_result.exit_code,
+            "stdout": build_result.stdout,
+            "stderr": build_result.stderr,
+        }
+    run_result = run([basename])
+    if "In file included from /shared/centos7/gcc/9.2.0-skylake/" in run_result.stderr:
+        raise Exception("Skylake bug encountered")
+    if "/4.8.2" in run_result.stderr:
+        raise Exception("Ancient compiler encountered")
+    if run_result.timeout:
+        status = "Timeout"
+    elif run_result.exit_code != 0:
+        status = "Exception"
+    else:
+        status = "OK"
+    return {
+        "status": status,
+        "exit_code": run_result.exit_code,
+        "stdout": run_result.stdout,
+        "stderr": run_result.stderr,
+    }
+if __name__ == "__main__":
+    main(eval_script, LANG_NAME, LANG_EXT)

src/eval_cs.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import subprocess
+from pathlib import Path
+import os
+import subprocess
+import tempfile
+from pathlib import Path
+from generic_eval import main
+LANG_NAME = "CSharp"
+LANG_EXT = ".cs"
+#Following files have problems:
+#137,
+#22: Any
+#148: Elipsis
+def eval_script(path: str):
+    if ".cs" not in path.name:
+        return
+    basename = ".".join(str(path).split(".")[:-1])
+    binaryname = basename + ".exe"
+    build = subprocess.run(["csc", "/d:DEBUG", "-r:System.Numerics.dll", path, f"/out:{binaryname}"], capture_output=True)
+    status = None
+    returncode = -1
+    output = None
+    if build.returncode != 0:
+        # Well, it's a compile error. May be a type error or
+        # something. But, why break the set convention
+        status = "SyntaxError"
+        returncode = build.returncode
+        output = build
+    else:
+        try:
+            output = subprocess.run(["mono", binaryname], env={"PATH": os.getenv("PATH"), "MONO_TRACE_LISTENER":"Console.Error"}, capture_output=True, timeout=5)
+            returncode = output.returncode
+            output.stderr = str(output.stderr, "utf-8")
+            #mono return 0 even when failing
+            fail = "System.Diagnostics.DefaultTraceListener.Fail" in output.stderr or "Unhandled Exception" in output.stderr
+            output.returncode = 1 if fail else 0
+            if output.returncode == 0:
+                status = "OK"
+            else:
+                # Well, it's a panic
+                status = "Exception"
+        except subprocess.TimeoutExpired as exc:
+            status = "Timeout"
+            output = exc
+        os.remove(binaryname)
+    if output.stdout is not None:
+        output.stdout = output.stdout.decode("utf-8")
+    else:
+        output.stdout = "None"
+    if output.stderr == "":
+        output.stderr = "None"
+    return {
+        "status": status,
+        "exit_code": returncode,
+        "stdout": output.stdout,
+        "stderr": output.stderr,
+    }
+if __name__ == "__main__":
+    main(eval_script, LANG_NAME, LANG_EXT)

src/eval_dart.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["dart", "analyze", "--no-fatal-warnings", str(path)], timeout_seconds=15)
+    if r.exit_code != 0:
+        return {
+            "status": "SyntaxError",
+            "exit_code": r.exit_code,
+            "stdout": r.stdout,
+            "stderr": r.stderr,
+        }
+    r = run(["dart", str(path)], timeout_seconds=15)
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_dfy.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pathlib import Path
+from safe_subprocess import run
+# 0 – success
+# 1 – invalid command-line arguments
+# 2 – syntax, parse, or name or type resolution errors
+# 3 – compilation errors
+# 4 – verification errors
+def eval_script(path: Path):
+    r = run(["dafny", "run", str(path)])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    elif r.exit_code == 2:
+        status = "SyntaxError"
+    elif r.exit_code == 3:
+        status = "CompilationError"
+    elif r.exit_code == 4:
+        status = "VerificationError"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_dlang.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+import subprocess
+from pathlib import Path
+from safe_subprocess import run
+import sys
+import re
+ENABLE_SYNTAX_CHECK = False
+def eval_script(path: Path):
+    result = run(["rdmd", "-unittest", str(path)], timeout_seconds=15)
+    if "might not be correctly installed" in result.stderr:
+        raise Exception("D is not correctly installed")
+    if result.timeout:
+        status = "Timeout"
+    elif result.exit_code == 0:
+        status = "OK"
+    elif "Error:" in result.stderr:
+        status = "SyntaxError"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": result.exit_code,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+    }
+DIR = "d-keep-code_davinci_001_temp_0.2"
+def main():
+    directory = Path(Path(__file__).parent, "..", "datasets", DIR).resolve()
+    count = {"OK": 0, "Timeout": 0, "Exception": 0, "SyntaxError": 0}
+    for filename in os.listdir(directory):
+        path = Path.joinpath(directory, filename)
+        r = eval_script(path)
+        status = r["status"]
+        count[status] += 1
+        if ENABLE_SYNTAX_CHECK and status == "SyntaxError":
+            error_msgs = r["stderr"].split("\n")
+            with open(path) as source_file:
+                lines = source_file.readlines()
+                unittest_line_start = lines.index("unittest\n")
+                unittest_line_end = len(lines)
+                for err_msg_line in error_msgs:
+                    matched_parts = re.match(r"(\/?.*?\.[\w:]+\/.*.d)\(([0-9]+)\): Error: (.*)", err_msg_line[2:-1])
+                    _file, line_num = matched_parts[1], int(matched_parts[2])
+                    if unittest_line_start <= line_num and line_num <= unittest_line_end:
+                        print("===============")
+                        print(path, "contains error in unit test part")
+                        print(error_msgs)
+                        print("===============")
+        filename = filename.split(".")[0]
+        print(f"Dlang,{filename},{status}")
+    print(DIR + ":" + str(count))
+if __name__ == "__main__":
+    main()

src/eval_elixir.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import argparse
+from sys import exit
+import subprocess
+from pathlib import Path
+from generic_eval import main as gmain
+def eval_script(path: Path):
+    try:
+        # Assumes exit-code 0 is all okay
+        output = subprocess.run(["elixir", str(path)], capture_output=True, timeout=5)
+        if output.returncode == 0:
+            status = "OK"
+        else:
+            outmessage = str(output)
+            if "Assertion with == failed" in outmessage:
+                status = "AssertionError"
+            elif "SyntaxError" in outmessage:
+                status = "SyntaxError"
+            else:
+                status = "Exception"
+        returncode = output.returncode
+    except subprocess.TimeoutExpired as exc:
+        status = "Timeout"
+        output = exc
+        returncode = -1
+    return {
+        "status": status,
+        "exit_code": returncode,
+        "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
+        "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
+    }
+if __name__ == "__main__":
+    gmain(eval_script, "Elixir", ".exs")

src/eval_fs.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["dotnet", "fsi", "-d:DEBUG", str(path)])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    else:
+        status = "Exception"
+    return {
+        "status" : status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_go.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import argparse
+from sys import exit
+import subprocess
+from pathlib import Path
+from generic_eval import main as gmain
+def eval_script(path: Path):
+    status = None
+    stdout = None
+    stderr = None
+    exit_code = None
+    try:
+        build = subprocess.run(["go", "test", path],
+                               timeout=30,
+                               stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE)
+        stdout = build.stdout.decode("utf-8", errors="ignore")
+        stderr = build.stderr.decode("utf-8", errors="ignore")
+        exit_code = build.returncode
+        # write to stderr just so that we can redirect stdout to a csv
+        if "[setup failed]" in stdout or "[build failed]" in stdout:
+            status = "SyntaxError"
+        elif "FAIL" in stdout:
+            status = "Exception"
+        else:
+            status = "OK"
+    except subprocess.TimeoutExpired:
+        status = "Timeout"
+    return {
+        "status": status,
+        "exit_code": exit_code,
+        "stdout": stdout,
+        "stderr": stderr,
+    }
+if __name__ == "__main__":
+    gmain(eval_script, 'Go', '.go')

src/eval_hs.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["runghc", str(path)])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    elif "Syntax error":
+        status = "SyntaxError"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_java.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import tempfile
+from safe_subprocess import run
+from pathlib import Path
+from generic_eval import main
+LANG_NAME = "Java"
+LANG_EXT = ".java"
+#Following files have problems:
+#137,
+#22: Any
+#148: Elipsis
+def eval_script(path: Path):
+    sys_env = os.environ.copy()
+    javatuples_path = Path("/usr/multiple/javatuples-1.2.jar")
+    sys_env["CLASSPATH"] =  f"{javatuples_path}"
+    with tempfile.TemporaryDirectory() as outdir:
+        #Each Java file contains the class with same name `JAVA_CLASS_NAME`
+        #Hence, javac will same JAVA_CLASS_NAME.class file for each problem
+        #Write class for each problem to a different temp dir
+        #Use UTF8 encoding with javac
+        result = run(["javac", "-encoding", "UTF8", "-d", outdir, path], env=sys_env)
+        if result.exit_code != 0:
+            # Well, it's a compile error. May be a type error or
+            # something. But, why break the set convention
+            status = "SyntaxError"
+        else:
+            result = run(["java", "-ea", "-cp", f"{outdir}:{javatuples_path}", "Problem"], env = sys_env)
+            if result.timeout:
+                status = "Timeout"
+            elif result.exit_code == 0:
+                status = "OK"
+            else:
+                status = "Exception"
+    return {
+        "status": status,
+        "exit_code": result.exit_code,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+    }
+if __name__ == "__main__":
+    main(eval_script, LANG_NAME, LANG_EXT)

src/eval_javascript.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import subprocess
+from pathlib import Path
+def eval_script(path: Path):
+    try:
+        # Assumes exit-code 0 is all okay
+        output = subprocess.run(["node", str(path)], capture_output=True, timeout=5)
+        if output.returncode == 0:
+            status = "OK"
+        else:
+            outmessage = str(output)
+            if 'ERR_ASSERTION' in outmessage:
+                status = "AssertionError"
+            elif 'SyntaxError' in outmessage:
+                status = "SyntaxError"
+            elif 'ReferenceError' in outmessage:
+                status = "ReferenceError"
+            else:
+                status = "Exception"
+        returncode = output.returncode
+    except subprocess.TimeoutExpired as exc:
+        status = "Timeout"
+        output = exc
+        returncode = -1
+    except subprocess.CalledProcessError as exc:
+        status = "Exception"
+        returncode = exc.returncode
+        output = exc
+    return {
+            "status": status,
+            "exit_code": returncode,
+            "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
+            "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
+                }
+def main():
+    directory = Path(Path(__file__).parent, "..", "datasets", "js-keep-code_davinci_001_temp_0.2").resolve()
+    for filename in os.listdir(directory):
+        r = eval_script(Path.joinpath(directory,filename))
+        filename = filename.split(".")[0]
+        print(f"JavaScript,{filename},{r['status']}")
+if __name__ == "__main__":
+    main()

src/eval_julia.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from safe_subprocess import run
+from pathlib import Path
+def eval_script(path: Path):
+    result = run(["julia", str(path)], timeout_seconds=5)
+    if result.timeout:
+        status = "Timeout"
+    elif result.exit_code == 0:
+        status = "OK"
+    # TODO(arjun): I would like this to be reviewed more carefully by John.
+    elif len(result.stderr) < 1:
+        status = "Exception"
+    else:
+        status = "SyntaxError"
+    return {
+        "status": status,
+        "exit_code": result.exit_code,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+    }

src/eval_lean.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pathlib import Path
+from safe_subprocess import run
+import subprocess
+def eval_script(path: Path):
+    # since lean is a theorem prover first and not a programming environment,
+    # the return code is always 1. idk.
+    try:
+        output = subprocess.run(["lean", str(path)], capture_output=True, timeout=5)
+        outmessage = str(output)
+        if "error: tactic 'rfl' failed" in outmessage: # :skull:
+            status = "AssertionError"
+        elif outmessage == "":
+            status = "OK"
+        else:
+            status = "SyntaxError"
+        returncode = output.returncode
+    except subprocess.TimeoutExpired as exc:
+        status = "Timeout"
+        output = exc
+        returncode = -1
+    return {
+        "status": status,
+        "exit_code": returncode,
+        "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
+        "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
+    }

src/eval_lua.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["lua", str(path)])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_luau.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["luau-analyze", str(path)])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        r = run(["luau", str(path)])
+        if r.timeout:
+            status = "Timeout"
+        elif r.exit_code == 0:
+            status = "OK"
+        else:
+            status = "Exception"
+    elif "SyntaxError" in r.stderr:
+        status = "SyntaxError"
+    else:
+        status = "TypeError"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_matlab.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path):
+    # Matlab has the requirement that all functions must appear at the end
+    # of the file. So we first have to write the call to the test-function at the
+    # beginning of the file.
+    with open(path, 'r') as f:
+        content = f.read()
+    content = f"test();\n{content}"
+    with open(path, 'w') as f:
+        f.write(content)
+    filename = path.stem
+    parent_dir = path.parent.absolute()
+    # We use the matlab.engine to run the script; however, the way that the
+    # matlab engine works requires that we call the script as if it were a
+    # member of the matlab.engine object. So we have to write a python script
+    # that calls the matlab script. This also ensures that the script is called
+    # in a safe-subprocess. Who needs runtime reflection when you have IPC?
+    program= f"""
+import matlab.engine
+import io
+import sys
+out = io.StringIO()
+err = io.StringIO()
+eng = matlab.engine.start_matlab()
+eng.addpath(r'{parent_dir}',nargout=0)
+try:
+    r = eng.{filename}(nargout=0, stdout=out,stderr=err)
+    print(out.getvalue())
+except matlab.engine.MatlabExecutionError as e:
+    print(err.getvalue(), file=sys.stderr)
+"""
+    r = run(["python3", "-c", program], timeout_seconds=30)
+    # This is still somewhat brittle.
+    if r.timeout:
+        status = "Timeout"
+        exit_code = -1
+    elif r.stderr == "":
+        status = "OK"
+        exit_code = 0
+    else:
+        status = "Exception"
+        exit_code = 1
+    return {
+        "status": status,
+        "exit_code": exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_ocaml.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["ocaml", str(path)])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    elif "Assert_failure" in r.stderr:
+        status = "AssertionError"
+    elif "Syntax error" in r.stderr:
+        status = "SyntaxError"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_php.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from pathlib import Path
+from safe_subprocess import run
+LANG_NAME = "PHP"
+LANG_EXT = ".php"
+def eval_script(path: Path):
+    r = run(["php", path])
+    if "PHP Parse error" in r.stdout:
+        status = "SyntaxError"
+    elif r.exit_code != 0:
+        status = "Exception"
+    else:
+        status = "OK"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_pl.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["perl", path])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code != 0:
+        status = "Exception"
+    elif "ERROR" in r.stdout or "ERROR" in r.stderr:
+        status = "Exception"
+    else:
+        status = "OK"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_python.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["python3", str(path)])
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    elif "SyntaxError" in r.stderr:
+        status = "SyntaxError"
+    else:
+        status = "Exception"
+    return {
+        "status" : status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_r.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import subprocess
+from pathlib import Path
+def eval_script(path: Path):
+    try:
+        # Assumes exit-code 0 is all okay
+        # Run R on the file, capturing stderr
+        output = subprocess.run(["Rscript", str(path)], capture_output=True, timeout=5)
+        if output.returncode == 0:
+            status = "OK"
+        else:
+            outmessage = str(output)
+            if 'unexpected' in outmessage:
+                status = "SyntaxError"
+            elif "err=b''" in outmessage:
+                status = "AssertionError"
+            else:
+                status = "Exception"
+        returncode = output.returncode
+    except subprocess.TimeoutExpired as exc:
+        status = "Timeout"
+        output = exc
+        returncode = -1
+    except subprocess.CalledProcessError as exc:
+        status = "Exception"
+        returncode = exc.returncode
+        output = exc
+    return {
+            "status": status,
+            "exit_code": returncode,
+            "stdout": output.stdout,
+            "stderr": output.stderr
+    }
+def main():
+    directory = Path(Path(__file__).parent, "..", "datasets", "R-keep-code_davinci_001_temp_0.2").resolve()
+    for filename in os.listdir(directory):
+        r = eval_script(Path.joinpath(directory,filename))
+        filename = filename.split(".")[0]
+        print(f"R,{filename},{r['status']}")
+if __name__ == "__main__":
+    main()

src/eval_racket.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+Evaluates a generated Racket program (.rkt).
+"""
+import os
+from pathlib import Path
+from safe_subprocess import run
+from libeval import run_without_exn
+def eval_script(path: Path):
+    result = run(["racket", str(path)])
+    if (
+        "standard-module-name-resolver: collection not found\n  for module path: rackunit"
+        in result.stderr
+    ):
+        print(f"Failed to run evaluation for {path}: rackunit is not installed")
+        return None
+    # rackunit produces exit code 0 even if tests fail.
+    if len(result.stderr) > 0 or result.exit_code != 0:
+        if "read-syntax" in result.stderr:
+            status = "SyntaxError"
+        else:
+            status = "Exception"
+    else:
+        status = "OK"
+    return {
+        "status": status,
+        "exit_code": result.exit_code,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+    }
+def main():
+    directory = Path(
+        Path(__file__).parent, "..", "datasets", "racket-keep-code_davinci_001_temp_0.2"
+    ).resolve()
+    for filename in os.listdir(directory):
+        r = eval_script(Path.joinpath(directory, filename))
+        filename = filename.split(".")[0]
+        print(f"Racket,{filename},{r['status']}")
+if __name__ == "__main__":
+    main()

src/eval_ruby.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import argparse
+from sys import exit
+import subprocess
+from pathlib import Path
+from generic_eval import main as gmain
+def eval_script(path: Path):
+    try:
+        # Assumes exit-code 0 is all okay
+        # Need check=True for Ruby to pass errors to CalledProcessError
+        output = subprocess.run(
+            ["ruby", path], check=True, capture_output=True, timeout=5
+        )
+        if output.returncode == 0:
+            status = "OK"
+            out = output.stderr
+            error = output.stdout
+            returncode = 0
+        else:
+            raise Exception("there's an issue with check = True for Ruby, INVESTIGATE!")
+    except subprocess.TimeoutExpired as exc:
+        status = "Timeout"
+        out = exc.stdout
+        error = exc.stderr
+        returncode = -1
+    except subprocess.CalledProcessError as exc:
+        returncode = exc.returncode
+        out = exc.stdout
+        error = exc.stderr
+        #failure with code 1 but no error message is an Exception from Failed tests
+        if len(error) < 1:
+            status = "Exception"
+        else: #everything that prints out an error message is a SyntaxError
+            status = "SyntaxError"
+    return {
+        "status": status,
+        "exit_code": returncode,
+        "stdout": out,
+        "stderr": error,
+    }
+if __name__ == "__main__":
+    gmain(eval_script, 'Ruby', '.rb')

src/eval_rust.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import subprocess
+import tempfile
+from pathlib import Path
+from generic_eval import main
+LANG_NAME = "Rust"
+LANG_EXT = ".rs"
+def eval_script(path: Path):
+    basename = ".".join(str(path).split(".")[:-1])
+    try:
+        build = subprocess.run(["rustc", path, "-o", basename], capture_output=True, timeout=15)
+    except subprocess.TimeoutExpired as exc:
+        return {
+            "status": "Timeout",
+            "exit_code": -1,
+            "stdout": "Compiler timeout",
+            "stderr": "Compiler timeout",
+        }
+    status = None
+    returncode = -1
+    output = None
+    if build.returncode != 0:
+        # Well, it's a compile error. May be a type error or
+        # something. But, why break the set convention
+        status = "SyntaxError"
+        returncode = build.returncode
+        output = build
+    else:
+        try:
+            # Assumes exit-code 0 is all okay
+            output = subprocess.run([basename], capture_output=True, timeout=5)
+            returncode = output.returncode
+            if output.returncode == 0:
+                status = "OK"
+            else:
+                # Well, it's a panic
+                status = "Exception"
+        except subprocess.TimeoutExpired as exc:
+            status = "Timeout"
+            output = exc
+        os.remove(basename)
+    return {
+        "status": status,
+        "exit_code": returncode,
+        "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
+        "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
+    }
+if __name__ == "__main__":
+    main(eval_script, LANG_NAME, LANG_EXT)

src/eval_scala.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from pathlib import Path
+import tempfile
+from safe_subprocess import run
+LANG_NAME = "Scala"
+LANG_EXT = ".scala"
+def eval_script(path: Path):
+    with tempfile.TemporaryDirectory() as outdir:
+        # Each Scala file contains the class with same name `JAVA_CLASS_NAME`
+        # Hence, scalac will same JAVA_CLASS_NAME.class file for each problem
+        # Write class for each problem to a different temp dir
+        build = run(["scalac", "-d", outdir, path], timeout_seconds=45)
+        if build.exit_code != 0:
+            # Well, it's a compile error. May be a type error or
+            # something. But, why break the set convention
+            return {
+                "status": "SyntaxError",
+                "exit_code": build.exit_code,
+                "stdout": build.stdout,
+                "stderr": build.stderr,
+            }
+        # "Problem" is the name of the class we emit.
+        r = run(["scala", "-cp", f"{outdir}", "Problem"])
+        if r.timeout:
+            status = "Timeout"
+        elif r.exit_code == 0 and r.stderr == "":
+            status = "OK"
+        else:
+            # Well, it's a panic
+            status = "Exception"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_sh.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from pathlib import Path
+from safe_subprocess import run
+LANG_NAME = "bash"
+LANG_EXT = ".sh"
+def eval_script(path: Path):
+    # Capture output - will be generated regardless of success, fail, or syntax error
+    p = run(["bash", path])
+    if p.timeout:
+        status = "Timeout"
+    elif p.exit_code == 0:
+        status = "OK"
+    elif "syntax error" in p.stderr:
+        status = "SyntaxError"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": p.exit_code,
+        "stdout": p.stdout,
+        "stderr": p.stderr,
+    }

src/eval_swift.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import subprocess
+from pathlib import Path
+import os
+from safe_subprocess import run
+def eval_script(path: Path):
+    basename = ".".join(str(path).split(".")[:-1])
+    r = run(["swiftc", path, "-o", basename], timeout_seconds=45)
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code != 0:
+        # Well, it's a compile error. May be a type error or
+        # something. But, why break the set convention
+        status = "SyntaxError"
+    else:
+        r = run([basename], timeout_seconds=5)
+        if r.timeout:
+            status = "Timeout"
+        elif r.exit_code != 0:
+            # Well, it's a panic
+            status = "Exception"
+        else:
+            status = "OK"
+        os.remove(basename)
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_ts.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from pathlib import Path
+from safe_subprocess import run
+def eval_script(path: Path):
+    r = run(["tsc", "--target", "esnext", str(path)], timeout_seconds=15)
+    if r.exit_code != 0:
+        return {
+            "status": "SyntaxError",
+            "exit_code": r.exit_code,
+            "stdout": r.stdout,
+            "stderr": r.stderr,
+        }
+    r = run(["node", str(path).replace(".ts", ".js")], timeout_seconds=15)
+    if r.timeout:
+        status = "Timeout"
+    elif r.exit_code == 0:
+        status = "OK"
+    elif "ERR_ASSERTION" in r.stderr:
+        status = "AssertionError"
+    elif "SyntaxError" in r.stderr:
+        status = "SyntaxError"
+    elif "ReferenceError" in r.stderr:
+        status = "ReferenceError"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": r.exit_code,
+        "stdout": r.stdout,
+        "stderr": r.stderr,
+    }

src/eval_v.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from pathlib import Path
+from safe_subprocess import run
+import subprocess
+# return codes for coqc:
+# 0: compilation goes through
+# 1: some sort of error (nondescript)
+def eval_script(path: Path):
+    cleanup_extensions = ['.vo', '.vok', '.vos']
+    try:
+        # sadly there seems to be no way to verify proofs in a coq file without compiling
+        output = subprocess.run(["coqc", "-noglob", str(path)], capture_output=True, timeout=5)
+        outmessage = str(output)
+        if output.returncode == 0:
+            status = "OK"
+            # cleanup: remove files generated by coqc
+            for ext in cleanup_extensions:
+                file_to_remove = path.with_suffix(ext)
+                if file_to_remove.exists():
+                    file_to_remove.unlink()
+        elif "Unable to unify" in outmessage:
+            status = "AssertionError"
+        else:
+            status = "SyntaxError"
+        returncode = output.returncode
+    except subprocess.TimeoutExpired as exc:
+        status = "Timeout"
+        output = exc
+        returncode = -1
+    return {
+        "status": status,
+        "exit_code": returncode,
+        "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
+        "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
+    }