朱东升 commited on
Commit
30b1610
·
1 Parent(s): c4f1102
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ghcr.io/nuprl/multipl-e-evaluation@sha256:11864ca95774df16c34b4cd1eac231f9e5466c7ea38dac98e5b5b053e18479de
2
+
3
+ WORKDIR /app
4
+
5
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
6
+
7
+ COPY requirements.txt .
8
+ RUN pip install -r requirements.txt
9
+
10
+ COPY app.py .
11
+ COPY src/ ./src/
12
+
13
+ EXPOSE 7860
14
+
15
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import importlib
4
+ import os
5
+ from pathlib import Path
6
+
7
+ def evaluate(input_data):
8
+ """评估代码的主函数
9
+
10
+ Args:
11
+ input_data: 可以是字符串(文件路径)或字典(包含代码信息)
12
+
13
+ Returns:
14
+ dict: 包含评估结果的字典
15
+ """
16
+ try:
17
+ # 如果输入是文件路径
18
+ if isinstance(input_data, str):
19
+ with open(input_data, 'r') as f:
20
+ code = f.read()
21
+ # 从文件扩展名确定语言
22
+ language = Path(input_data).suffix[1:]
23
+ result = evaluate_code(code, language)
24
+ return result
25
+
26
+ # 如果输入是字典
27
+ elif isinstance(input_data, dict):
28
+ language = input_data.get('language')
29
+ completions = input_data.get('completions', [])
30
+
31
+ if not completions:
32
+ return {"status": "Exception", "error": "No code provided"}
33
+
34
+ # 评估所有完成的代码
35
+ results = []
36
+ for code in completions:
37
+ result = evaluate_code(code, language)
38
+ results.append(result)
39
+
40
+ # 如果任一代码执行成功,则返回成功
41
+ if any(r["status"] == "OK" for r in results):
42
+ return {"status": "pass"}
43
+ else:
44
+ return results[0] # 返回第一个失败结果
45
+
46
+ else:
47
+ return {"status": "Exception", "error": "Invalid input format"}
48
+
49
+ except Exception as e:
50
+ return {"status": "Exception", "error": str(e)}
51
+
52
+ def evaluate_code(code, language):
53
+ """评估特定语言的代码
54
+
55
+ Args:
56
+ code (str): 要评估的代码
57
+ language (str): 编程语言
58
+
59
+ Returns:
60
+ dict: 包含评估结果的字典
61
+ """
62
+ try:
63
+ # 动态导入对应语言的评估模块
64
+ module_name = f"src.eval_{language.lower()}"
65
+ module = importlib.import_module(module_name)
66
+
67
+ # 创建临时文件存储代码
68
+ temp_dir = Path("temp")
69
+ temp_dir.mkdir(exist_ok=True)
70
+ temp_file = temp_dir / f"temp.{language}"
71
+
72
+ with open(temp_file, "w") as f:
73
+ f.write(code)
74
+
75
+ # 调用对应语言的评估函数
76
+ result = module.eval_script(temp_file)
77
+
78
+ # 清理临时文件
79
+ if temp_file.exists():
80
+ temp_file.unlink()
81
+
82
+ return result
83
+
84
+ except ImportError:
85
+ return {"status": "Exception", "error": f"Language {language} not supported"}
86
+ except Exception as e:
87
+ return {"status": "Exception", "error": str(e)}
88
+
89
+ # 创建Gradio接口
90
+ demo = gr.Interface(
91
+ fn=evaluate,
92
+ inputs=gr.JSON(),
93
+ outputs=gr.JSON(),
94
+ title="代码评估服务",
95
+ description="支持多种编程语言的代码评估服务"
96
+ )
97
+
98
+ if __name__ == "__main__":
99
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=3.50.2
2
+ pathlib>=1.0.1
3
+ importlib>=1.0.4
src/__init__.py ADDED
File without changes
src/containerized_eval.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NOTE: Nothing containerized about this any more. This is just a helper
3
+ for problem_evaluator.py.
4
+ """
5
+
6
+ from pathlib import Path
7
+ import eval_adb
8
+ import eval_ruby
9
+ import eval_lua
10
+ import eval_python
11
+ import eval_rust
12
+ import eval_julia
13
+ import eval_java
14
+ import eval_lua
15
+ import eval_racket
16
+ import eval_javascript
17
+ import eval_swift
18
+ import eval_cpp
19
+ import eval_php
20
+ import eval_dlang
21
+ import eval_julia
22
+ import eval_r
23
+ import eval_fs
24
+ import eval_ocaml
25
+ import eval_matlab
26
+ import eval_hs
27
+ import eval_elixir
28
+ import eval_clj
29
+ import eval_v
30
+ import eval_lean
31
+ import eval_dart
32
+ import tempfile
33
+
34
+
35
+ EVALUATORS = {
36
+ "ada": (eval_adb.eval_script, ".adb"),
37
+ "rb": (eval_ruby.eval_script, ".rb"),
38
+ "lua": (eval_lua.eval_script, ".lua"),
39
+ "python": (eval_python.eval_script, ".py"),
40
+ "py": (eval_python.eval_script, ".py"),
41
+ "notypes.py": (eval_python.eval_script, ".py"),
42
+ "julia": (eval_julia.eval_script, ".jl"),
43
+ "java" : (eval_java.eval_script, ".java"),
44
+ "rust" : (eval_rust.eval_script, ".rs"),
45
+ "rs" : (eval_rust.eval_script, ".rs"),
46
+ "swift": (eval_swift.eval_script, ".swift"),
47
+ "lua": (eval_lua.eval_script, ".lua"),
48
+ "racket": (eval_racket.eval_script, ".rkt"),
49
+ "rkt": (eval_racket.eval_script, ".rkt"),
50
+ "javascript": (eval_javascript.eval_script, ".js"),
51
+ "js": (eval_javascript.eval_script, ".js"),
52
+ "cpp": (eval_cpp.eval_script, ".cpp"),
53
+ "php": (eval_php.eval_script, ".php"),
54
+ "humaneval_to_dlang.py": (eval_dlang.eval_script, ".d"),
55
+ "d": (eval_dlang.eval_script, ".d"),
56
+ "r": (eval_r.eval_script, ".r"),
57
+ "humaneval_to_r.py": (eval_r.eval_script, ".r"),
58
+ "jl": (eval_julia.eval_script, ".jl"),
59
+ "fs": (eval_fs.eval_script, ".fsx"),
60
+ "ml": (eval_ocaml.eval_script, ".ml"),
61
+ "m": (eval_matlab.eval_script, ".m"),
62
+ "hs": (eval_hs.eval_script, ".hs"),
63
+ "elixir": (eval_elixir.eval_script, ".exs"),
64
+ "clj": (eval_clj.eval_script, ".clj"),
65
+ "coq": (eval_v.eval_script, ".v"),
66
+ "lean": (eval_lean.eval_script, ".lean"),
67
+ "dart": (eval_dart.eval_script, ".dart"),
68
+ }
69
+
70
+ def eval_string_script(language, program):
71
+ if language in EVALUATORS:
72
+ (eval_script, file_ext) = EVALUATORS[language]
73
+ else:
74
+ eval_module = __import__(f"eval_{language}" if language != "go_test.go" else "eval_go")
75
+ eval_script = eval_module.eval_script
76
+ file_ext = f".{language}" if language != "go_test.go" else "_test.go"
77
+ with tempfile.NamedTemporaryFile(suffix=file_ext, delete=True) as f:
78
+ f.write(program.encode("utf-8"))
79
+ f.flush()
80
+ result = eval_script(Path(f.name))
81
+ # Only save the first 2K of output from the running program. Any futher
82
+ # output is very likely an exceptionally long stack trace or a long
83
+ # series of prints.
84
+ if type(result["stdout"]) == bytes:
85
+ result["stdout"] = result["stdout"].decode("utf-8", errors="ignore")
86
+ if result["stdout"] is None:
87
+ result["stdout"] = ""
88
+ if result["stderr"] is None:
89
+ result["stderr"] = ""
90
+ if type(result["stderr"]) == bytes:
91
+ result["stderr"] = result["stderr"].decode("utf-8", errors="ignore")
92
+ assert type(result["stdout"]) == str
93
+ assert type(result["stderr"]) == str
94
+ return {
95
+ "program": program,
96
+ "stdout": result['stdout'].replace("!!int", "")[:2048],
97
+ "stderr": result['stderr'][:2048],
98
+ "exit_code": result['exit_code'],
99
+ "status": result['status']
100
+ }
src/eval_adb.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+ from generic_eval import main
4
+
5
+
6
+ LANG_NAME = "Ada"
7
+ LANG_EXT = ".adb"
8
+
9
+
10
+ def eval_script(path: Path):
11
+ working_dir: Path = path.parent / (path.stem + "_tmp")
12
+ working_dir.mkdir()
13
+ chop_result = run(["gnatchop", "-w", path, working_dir])
14
+ if chop_result.exit_code != 0:
15
+ return {
16
+ "status": "SyntaxError (gnatchop)",
17
+ "exit_code": chop_result.exit_code,
18
+ "stdout": chop_result.stdout,
19
+ "stderr": chop_result.stderr,
20
+ }
21
+
22
+ build_result = run(
23
+ [
24
+ "gnatmake",
25
+ "-gnatW8",
26
+ "main.adb",
27
+ "-o",
28
+ "main",
29
+ "-g",
30
+ "-j0",
31
+ "-gnata",
32
+ "-gnat2022",
33
+ "-gnateE",
34
+ "-bargs",
35
+ "-Es",
36
+ ],
37
+ cwd=str(working_dir),
38
+ )
39
+ if build_result.exit_code != 0:
40
+ return {
41
+ "status": "SyntaxError (gnatmake)",
42
+ "exit_code": build_result.exit_code,
43
+ "stdout": build_result.stdout,
44
+ "stderr": build_result.stderr,
45
+ }
46
+
47
+ status = "OK"
48
+ run_result = run(["./main"], cwd=str(working_dir))
49
+
50
+ if run_result.timeout:
51
+ status = "Timeout"
52
+ elif run_result.exit_code != 0:
53
+ status = "Exception"
54
+
55
+ return {
56
+ "status": status,
57
+ "exit_code": run_result.exit_code,
58
+ "stdout": run_result.stdout,
59
+ "stderr": run_result.stderr,
60
+ }
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main(eval_script, LANG_NAME, LANG_EXT)
src/eval_clj.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluates a generated Clojure program (.clj).
3
+ """
4
+ import os
5
+ from pathlib import Path
6
+ from safe_subprocess import run
7
+ from libeval import run_without_exn
8
+
9
+
10
+ def eval_script(path: Path):
11
+ result = run(["clojure", "-J-Dclojure.main.report=stderr", "-M", str(path)])
12
+
13
+ if result.timeout:
14
+ status = "Timeout"
15
+ elif result.exit_code != 0:
16
+ status = "Exception"
17
+ elif "\n0 failures, 0 errors.\n" in result.stdout:
18
+ status = "OK"
19
+ else: # test failure
20
+ status = "Exception"
21
+
22
+ return {
23
+ "status": status,
24
+ "exit_code": result.exit_code,
25
+ "stdout": result.stdout,
26
+ "stderr": result.stderr,
27
+ }
28
+
29
+ if __name__ == "__main__":
30
+ main()
src/eval_cpp.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+ from generic_eval import main
4
+
5
+ LANG_NAME = "C++"
6
+ LANG_EXT = ".cpp"
7
+
8
+
9
+ def eval_script(path: Path):
10
+ basename = ".".join(str(path).split(".")[:-1])
11
+ build_result = run(["g++", path, "-o", basename, "-std=c++17"])
12
+ if build_result.exit_code != 0:
13
+ return {
14
+ "status": "SyntaxError",
15
+ "exit_code": build_result.exit_code,
16
+ "stdout": build_result.stdout,
17
+ "stderr": build_result.stderr,
18
+ }
19
+
20
+ run_result = run([basename])
21
+ if "In file included from /shared/centos7/gcc/9.2.0-skylake/" in run_result.stderr:
22
+ raise Exception("Skylake bug encountered")
23
+ if "/4.8.2" in run_result.stderr:
24
+ raise Exception("Ancient compiler encountered")
25
+ if run_result.timeout:
26
+ status = "Timeout"
27
+ elif run_result.exit_code != 0:
28
+ status = "Exception"
29
+ else:
30
+ status = "OK"
31
+ return {
32
+ "status": status,
33
+ "exit_code": run_result.exit_code,
34
+ "stdout": run_result.stdout,
35
+ "stderr": run_result.stderr,
36
+ }
37
+
38
+
39
+ if __name__ == "__main__":
40
+ main(eval_script, LANG_NAME, LANG_EXT)
src/eval_cs.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+ import os
6
+ import subprocess
7
+ import tempfile
8
+ from pathlib import Path
9
+ from generic_eval import main
10
+
11
+ LANG_NAME = "CSharp"
12
+ LANG_EXT = ".cs"
13
+
14
+ #Following files have problems:
15
+ #137,
16
+ #22: Any
17
+ #148: Elipsis
18
+
19
+ def eval_script(path: str):
20
+ if ".cs" not in path.name:
21
+ return
22
+ basename = ".".join(str(path).split(".")[:-1])
23
+ binaryname = basename + ".exe"
24
+ build = subprocess.run(["csc", "/d:DEBUG", "-r:System.Numerics.dll", path, f"/out:{binaryname}"], capture_output=True)
25
+ status = None
26
+ returncode = -1
27
+ output = None
28
+ if build.returncode != 0:
29
+ # Well, it's a compile error. May be a type error or
30
+ # something. But, why break the set convention
31
+ status = "SyntaxError"
32
+ returncode = build.returncode
33
+ output = build
34
+ else:
35
+ try:
36
+ output = subprocess.run(["mono", binaryname], env={"PATH": os.getenv("PATH"), "MONO_TRACE_LISTENER":"Console.Error"}, capture_output=True, timeout=5)
37
+ returncode = output.returncode
38
+ output.stderr = str(output.stderr, "utf-8")
39
+ #mono return 0 even when failing
40
+ fail = "System.Diagnostics.DefaultTraceListener.Fail" in output.stderr or "Unhandled Exception" in output.stderr
41
+ output.returncode = 1 if fail else 0
42
+ if output.returncode == 0:
43
+ status = "OK"
44
+ else:
45
+ # Well, it's a panic
46
+ status = "Exception"
47
+ except subprocess.TimeoutExpired as exc:
48
+ status = "Timeout"
49
+ output = exc
50
+ os.remove(binaryname)
51
+
52
+ if output.stdout is not None:
53
+ output.stdout = output.stdout.decode("utf-8")
54
+ else:
55
+ output.stdout = "None"
56
+
57
+ if output.stderr == "":
58
+ output.stderr = "None"
59
+
60
+ return {
61
+ "status": status,
62
+ "exit_code": returncode,
63
+ "stdout": output.stdout,
64
+ "stderr": output.stderr,
65
+ }
66
+
67
+ if __name__ == "__main__":
68
+ main(eval_script, LANG_NAME, LANG_EXT)
src/eval_dart.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+
5
+ def eval_script(path: Path):
6
+ r = run(["dart", "analyze", "--no-fatal-warnings", str(path)], timeout_seconds=15)
7
+ if r.exit_code != 0:
8
+ return {
9
+ "status": "SyntaxError",
10
+ "exit_code": r.exit_code,
11
+ "stdout": r.stdout,
12
+ "stderr": r.stderr,
13
+ }
14
+
15
+ r = run(["dart", str(path)], timeout_seconds=15)
16
+ if r.timeout:
17
+ status = "Timeout"
18
+ elif r.exit_code == 0:
19
+ status = "OK"
20
+ else:
21
+ status = "Exception"
22
+ return {
23
+ "status": status,
24
+ "exit_code": r.exit_code,
25
+ "stdout": r.stdout,
26
+ "stderr": r.stderr,
27
+ }
src/eval_dfy.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ # 0 – success
5
+ # 1 – invalid command-line arguments
6
+ # 2 – syntax, parse, or name or type resolution errors
7
+ # 3 – compilation errors
8
+ # 4 – verification errors
9
+
10
+ def eval_script(path: Path):
11
+ r = run(["dafny", "run", str(path)])
12
+ if r.timeout:
13
+ status = "Timeout"
14
+ elif r.exit_code == 0:
15
+ status = "OK"
16
+ elif r.exit_code == 2:
17
+ status = "SyntaxError"
18
+ elif r.exit_code == 3:
19
+ status = "CompilationError"
20
+ elif r.exit_code == 4:
21
+ status = "VerificationError"
22
+ else:
23
+ status = "Exception"
24
+ return {
25
+ "status": status,
26
+ "exit_code": r.exit_code,
27
+ "stdout": r.stdout,
28
+ "stderr": r.stderr,
29
+ }
src/eval_dlang.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ from pathlib import Path
4
+ from safe_subprocess import run
5
+ import sys
6
+ import re
7
+
8
+ ENABLE_SYNTAX_CHECK = False
9
+
10
+ def eval_script(path: Path):
11
+ result = run(["rdmd", "-unittest", str(path)], timeout_seconds=15)
12
+ if "might not be correctly installed" in result.stderr:
13
+ raise Exception("D is not correctly installed")
14
+
15
+ if result.timeout:
16
+ status = "Timeout"
17
+ elif result.exit_code == 0:
18
+ status = "OK"
19
+ elif "Error:" in result.stderr:
20
+ status = "SyntaxError"
21
+ else:
22
+ status = "Exception"
23
+
24
+ return {
25
+ "status": status,
26
+ "exit_code": result.exit_code,
27
+ "stdout": result.stdout,
28
+ "stderr": result.stderr,
29
+ }
30
+
31
+ DIR = "d-keep-code_davinci_001_temp_0.2"
32
+ def main():
33
+ directory = Path(Path(__file__).parent, "..", "datasets", DIR).resolve()
34
+
35
+ count = {"OK": 0, "Timeout": 0, "Exception": 0, "SyntaxError": 0}
36
+ for filename in os.listdir(directory):
37
+ path = Path.joinpath(directory, filename)
38
+ r = eval_script(path)
39
+ status = r["status"]
40
+ count[status] += 1
41
+
42
+ if ENABLE_SYNTAX_CHECK and status == "SyntaxError":
43
+ error_msgs = r["stderr"].split("\n")
44
+ with open(path) as source_file:
45
+ lines = source_file.readlines()
46
+ unittest_line_start = lines.index("unittest\n")
47
+ unittest_line_end = len(lines)
48
+ for err_msg_line in error_msgs:
49
+ matched_parts = re.match(r"(\/?.*?\.[\w:]+\/.*.d)\(([0-9]+)\): Error: (.*)", err_msg_line[2:-1])
50
+ _file, line_num = matched_parts[1], int(matched_parts[2])
51
+ if unittest_line_start <= line_num and line_num <= unittest_line_end:
52
+ print("===============")
53
+ print(path, "contains error in unit test part")
54
+ print(error_msgs)
55
+ print("===============")
56
+
57
+ filename = filename.split(".")[0]
58
+ print(f"Dlang,{filename},{status}")
59
+
60
+ print(DIR + ":" + str(count))
61
+
62
+ if __name__ == "__main__":
63
+ main()
src/eval_elixir.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from sys import exit
3
+ import subprocess
4
+ from pathlib import Path
5
+ from generic_eval import main as gmain
6
+
7
+
8
+ def eval_script(path: Path):
9
+ try:
10
+ # Assumes exit-code 0 is all okay
11
+ output = subprocess.run(["elixir", str(path)], capture_output=True, timeout=5)
12
+
13
+ if output.returncode == 0:
14
+ status = "OK"
15
+ else:
16
+ outmessage = str(output)
17
+ if "Assertion with == failed" in outmessage:
18
+ status = "AssertionError"
19
+ elif "SyntaxError" in outmessage:
20
+ status = "SyntaxError"
21
+ else:
22
+ status = "Exception"
23
+ returncode = output.returncode
24
+ except subprocess.TimeoutExpired as exc:
25
+ status = "Timeout"
26
+ output = exc
27
+ returncode = -1
28
+ return {
29
+ "status": status,
30
+ "exit_code": returncode,
31
+ "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
32
+ "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
33
+ }
34
+
35
+
36
+ if __name__ == "__main__":
37
+ gmain(eval_script, "Elixir", ".exs")
src/eval_fs.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ def eval_script(path: Path):
5
+ r = run(["dotnet", "fsi", "-d:DEBUG", str(path)])
6
+ if r.timeout:
7
+ status = "Timeout"
8
+ elif r.exit_code == 0:
9
+ status = "OK"
10
+ else:
11
+ status = "Exception"
12
+ return {
13
+ "status" : status,
14
+ "exit_code": r.exit_code,
15
+ "stdout": r.stdout,
16
+ "stderr": r.stderr,
17
+ }
src/eval_go.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from sys import exit
3
+ import subprocess
4
+ from pathlib import Path
5
+ from generic_eval import main as gmain
6
+
7
+
8
+ def eval_script(path: Path):
9
+ status = None
10
+ stdout = None
11
+ stderr = None
12
+ exit_code = None
13
+ try:
14
+ build = subprocess.run(["go", "test", path],
15
+ timeout=30,
16
+ stdout=subprocess.PIPE,
17
+ stderr=subprocess.PIPE)
18
+
19
+ stdout = build.stdout.decode("utf-8", errors="ignore")
20
+ stderr = build.stderr.decode("utf-8", errors="ignore")
21
+ exit_code = build.returncode
22
+ # write to stderr just so that we can redirect stdout to a csv
23
+
24
+ if "[setup failed]" in stdout or "[build failed]" in stdout:
25
+ status = "SyntaxError"
26
+ elif "FAIL" in stdout:
27
+ status = "Exception"
28
+ else:
29
+ status = "OK"
30
+ except subprocess.TimeoutExpired:
31
+ status = "Timeout"
32
+
33
+ return {
34
+ "status": status,
35
+ "exit_code": exit_code,
36
+ "stdout": stdout,
37
+ "stderr": stderr,
38
+ }
39
+
40
+
41
+ if __name__ == "__main__":
42
+ gmain(eval_script, 'Go', '.go')
src/eval_hs.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ def eval_script(path: Path):
5
+ r = run(["runghc", str(path)])
6
+ if r.timeout:
7
+ status = "Timeout"
8
+ elif r.exit_code == 0:
9
+ status = "OK"
10
+ elif "Syntax error":
11
+ status = "SyntaxError"
12
+ else:
13
+ status = "Exception"
14
+ return {
15
+ "status": status,
16
+ "exit_code": r.exit_code,
17
+ "stdout": r.stdout,
18
+ "stderr": r.stderr,
19
+ }
src/eval_java.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from safe_subprocess import run
4
+ from pathlib import Path
5
+ from generic_eval import main
6
+
7
+ LANG_NAME = "Java"
8
+ LANG_EXT = ".java"
9
+
10
+ #Following files have problems:
11
+ #137,
12
+ #22: Any
13
+ #148: Elipsis
14
+
15
+ def eval_script(path: Path):
16
+
17
+ sys_env = os.environ.copy()
18
+ javatuples_path = Path("/usr/multiple/javatuples-1.2.jar")
19
+
20
+ sys_env["CLASSPATH"] = f"{javatuples_path}"
21
+
22
+ with tempfile.TemporaryDirectory() as outdir:
23
+ #Each Java file contains the class with same name `JAVA_CLASS_NAME`
24
+ #Hence, javac will same JAVA_CLASS_NAME.class file for each problem
25
+ #Write class for each problem to a different temp dir
26
+ #Use UTF8 encoding with javac
27
+ result = run(["javac", "-encoding", "UTF8", "-d", outdir, path], env=sys_env)
28
+
29
+ if result.exit_code != 0:
30
+ # Well, it's a compile error. May be a type error or
31
+ # something. But, why break the set convention
32
+ status = "SyntaxError"
33
+ else:
34
+ result = run(["java", "-ea", "-cp", f"{outdir}:{javatuples_path}", "Problem"], env = sys_env)
35
+ if result.timeout:
36
+ status = "Timeout"
37
+ elif result.exit_code == 0:
38
+ status = "OK"
39
+ else:
40
+ status = "Exception"
41
+
42
+ return {
43
+ "status": status,
44
+ "exit_code": result.exit_code,
45
+ "stdout": result.stdout,
46
+ "stderr": result.stderr,
47
+ }
48
+
49
+ if __name__ == "__main__":
50
+ main(eval_script, LANG_NAME, LANG_EXT)
src/eval_javascript.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+ def eval_script(path: Path):
6
+ try:
7
+ # Assumes exit-code 0 is all okay
8
+ output = subprocess.run(["node", str(path)], capture_output=True, timeout=5)
9
+
10
+ if output.returncode == 0:
11
+ status = "OK"
12
+ else:
13
+ outmessage = str(output)
14
+ if 'ERR_ASSERTION' in outmessage:
15
+ status = "AssertionError"
16
+ elif 'SyntaxError' in outmessage:
17
+ status = "SyntaxError"
18
+ elif 'ReferenceError' in outmessage:
19
+ status = "ReferenceError"
20
+ else:
21
+ status = "Exception"
22
+ returncode = output.returncode
23
+ except subprocess.TimeoutExpired as exc:
24
+ status = "Timeout"
25
+ output = exc
26
+ returncode = -1
27
+ except subprocess.CalledProcessError as exc:
28
+ status = "Exception"
29
+ returncode = exc.returncode
30
+ output = exc
31
+ return {
32
+ "status": status,
33
+ "exit_code": returncode,
34
+ "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
35
+ "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
36
+ }
37
+
38
+
39
+
40
+ def main():
41
+ directory = Path(Path(__file__).parent, "..", "datasets", "js-keep-code_davinci_001_temp_0.2").resolve()
42
+
43
+ for filename in os.listdir(directory):
44
+ r = eval_script(Path.joinpath(directory,filename))
45
+ filename = filename.split(".")[0]
46
+ print(f"JavaScript,{filename},{r['status']}")
47
+
48
+ if __name__ == "__main__":
49
+ main()
src/eval_julia.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from safe_subprocess import run
2
+ from pathlib import Path
3
+
4
+ def eval_script(path: Path):
5
+ result = run(["julia", str(path)], timeout_seconds=5)
6
+ if result.timeout:
7
+ status = "Timeout"
8
+ elif result.exit_code == 0:
9
+ status = "OK"
10
+ # TODO(arjun): I would like this to be reviewed more carefully by John.
11
+ elif len(result.stderr) < 1:
12
+ status = "Exception"
13
+ else:
14
+ status = "SyntaxError"
15
+
16
+ return {
17
+ "status": status,
18
+ "exit_code": result.exit_code,
19
+ "stdout": result.stdout,
20
+ "stderr": result.stderr,
21
+ }
src/eval_lean.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+ import subprocess
4
+
5
+ def eval_script(path: Path):
6
+ # since lean is a theorem prover first and not a programming environment,
7
+ # the return code is always 1. idk.
8
+ try:
9
+ output = subprocess.run(["lean", str(path)], capture_output=True, timeout=5)
10
+ outmessage = str(output)
11
+
12
+ if "error: tactic 'rfl' failed" in outmessage: # :skull:
13
+ status = "AssertionError"
14
+ elif outmessage == "":
15
+ status = "OK"
16
+ else:
17
+ status = "SyntaxError"
18
+ returncode = output.returncode
19
+
20
+ except subprocess.TimeoutExpired as exc:
21
+ status = "Timeout"
22
+ output = exc
23
+ returncode = -1
24
+ return {
25
+ "status": status,
26
+ "exit_code": returncode,
27
+ "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
28
+ "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
29
+ }
src/eval_lua.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ def eval_script(path: Path):
5
+ r = run(["lua", str(path)])
6
+ if r.timeout:
7
+ status = "Timeout"
8
+ elif r.exit_code == 0:
9
+ status = "OK"
10
+ else:
11
+ status = "Exception"
12
+ return {
13
+ "status": status,
14
+ "exit_code": r.exit_code,
15
+ "stdout": r.stdout,
16
+ "stderr": r.stderr,
17
+ }
src/eval_luau.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+
5
+ def eval_script(path: Path):
6
+ r = run(["luau-analyze", str(path)])
7
+ if r.timeout:
8
+ status = "Timeout"
9
+ elif r.exit_code == 0:
10
+ r = run(["luau", str(path)])
11
+ if r.timeout:
12
+ status = "Timeout"
13
+ elif r.exit_code == 0:
14
+ status = "OK"
15
+ else:
16
+ status = "Exception"
17
+ elif "SyntaxError" in r.stderr:
18
+ status = "SyntaxError"
19
+ else:
20
+ status = "TypeError"
21
+ return {
22
+ "status": status,
23
+ "exit_code": r.exit_code,
24
+ "stdout": r.stdout,
25
+ "stderr": r.stderr,
26
+ }
src/eval_matlab.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ def eval_script(path):
5
+ # Matlab has the requirement that all functions must appear at the end
6
+ # of the file. So we first have to write the call to the test-function at the
7
+ # beginning of the file.
8
+ with open(path, 'r') as f:
9
+ content = f.read()
10
+ content = f"test();\n{content}"
11
+ with open(path, 'w') as f:
12
+ f.write(content)
13
+ filename = path.stem
14
+ parent_dir = path.parent.absolute()
15
+
16
+ # We use the matlab.engine to run the script; however, the way that the
17
+ # matlab engine works requires that we call the script as if it were a
18
+ # member of the matlab.engine object. So we have to write a python script
19
+ # that calls the matlab script. This also ensures that the script is called
20
+ # in a safe-subprocess. Who needs runtime reflection when you have IPC?
21
+ program= f"""
22
+ import matlab.engine
23
+ import io
24
+ import sys
25
+ out = io.StringIO()
26
+ err = io.StringIO()
27
+ eng = matlab.engine.start_matlab()
28
+ eng.addpath(r'{parent_dir}',nargout=0)
29
+ try:
30
+ r = eng.{filename}(nargout=0, stdout=out,stderr=err)
31
+ print(out.getvalue())
32
+ except matlab.engine.MatlabExecutionError as e:
33
+ print(err.getvalue(), file=sys.stderr)
34
+ """
35
+ r = run(["python3", "-c", program], timeout_seconds=30)
36
+
37
+ # This is still somewhat brittle.
38
+ if r.timeout:
39
+ status = "Timeout"
40
+ exit_code = -1
41
+ elif r.stderr == "":
42
+ status = "OK"
43
+ exit_code = 0
44
+ else:
45
+ status = "Exception"
46
+ exit_code = 1
47
+
48
+ return {
49
+ "status": status,
50
+ "exit_code": exit_code,
51
+ "stdout": r.stdout,
52
+ "stderr": r.stderr,
53
+ }
src/eval_ocaml.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ def eval_script(path: Path):
5
+ r = run(["ocaml", str(path)])
6
+ if r.timeout:
7
+ status = "Timeout"
8
+ elif r.exit_code == 0:
9
+ status = "OK"
10
+ elif "Assert_failure" in r.stderr:
11
+ status = "AssertionError"
12
+ elif "Syntax error" in r.stderr:
13
+ status = "SyntaxError"
14
+ else:
15
+ status = "Exception"
16
+ return {
17
+ "status": status,
18
+ "exit_code": r.exit_code,
19
+ "stdout": r.stdout,
20
+ "stderr": r.stderr,
21
+ }
src/eval_php.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ LANG_NAME = "PHP"
5
+ LANG_EXT = ".php"
6
+
7
+ def eval_script(path: Path):
8
+ r = run(["php", path])
9
+ if "PHP Parse error" in r.stdout:
10
+ status = "SyntaxError"
11
+ elif r.exit_code != 0:
12
+ status = "Exception"
13
+ else:
14
+ status = "OK"
15
+ return {
16
+ "status": status,
17
+ "exit_code": r.exit_code,
18
+ "stdout": r.stdout,
19
+ "stderr": r.stderr,
20
+ }
src/eval_pl.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ def eval_script(path: Path):
5
+ r = run(["perl", path])
6
+
7
+ if r.timeout:
8
+ status = "Timeout"
9
+ elif r.exit_code != 0:
10
+ status = "Exception"
11
+ elif "ERROR" in r.stdout or "ERROR" in r.stderr:
12
+ status = "Exception"
13
+ else:
14
+ status = "OK"
15
+ return {
16
+ "status": status,
17
+ "exit_code": r.exit_code,
18
+ "stdout": r.stdout,
19
+ "stderr": r.stderr,
20
+ }
src/eval_python.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ def eval_script(path: Path):
5
+ r = run(["python3", str(path)])
6
+ if r.timeout:
7
+ status = "Timeout"
8
+ elif r.exit_code == 0:
9
+ status = "OK"
10
+ elif "SyntaxError" in r.stderr:
11
+ status = "SyntaxError"
12
+ else:
13
+ status = "Exception"
14
+ return {
15
+ "status" : status,
16
+ "exit_code": r.exit_code,
17
+ "stdout": r.stdout,
18
+ "stderr": r.stderr,
19
+ }
src/eval_r.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+ def eval_script(path: Path):
6
+ try:
7
+ # Assumes exit-code 0 is all okay
8
+ # Run R on the file, capturing stderr
9
+ output = subprocess.run(["Rscript", str(path)], capture_output=True, timeout=5)
10
+ if output.returncode == 0:
11
+ status = "OK"
12
+ else:
13
+ outmessage = str(output)
14
+ if 'unexpected' in outmessage:
15
+ status = "SyntaxError"
16
+ elif "err=b''" in outmessage:
17
+ status = "AssertionError"
18
+ else:
19
+ status = "Exception"
20
+ returncode = output.returncode
21
+ except subprocess.TimeoutExpired as exc:
22
+ status = "Timeout"
23
+ output = exc
24
+ returncode = -1
25
+ except subprocess.CalledProcessError as exc:
26
+ status = "Exception"
27
+ returncode = exc.returncode
28
+ output = exc
29
+ return {
30
+ "status": status,
31
+ "exit_code": returncode,
32
+ "stdout": output.stdout,
33
+ "stderr": output.stderr
34
+ }
35
+
36
+
37
+
38
+ def main():
39
+ directory = Path(Path(__file__).parent, "..", "datasets", "R-keep-code_davinci_001_temp_0.2").resolve()
40
+
41
+ for filename in os.listdir(directory):
42
+ r = eval_script(Path.joinpath(directory,filename))
43
+ filename = filename.split(".")[0]
44
+ print(f"R,{filename},{r['status']}")
45
+
46
+ if __name__ == "__main__":
47
+ main()
src/eval_racket.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluates a generated Racket program (.rkt).
3
+ """
4
+ import os
5
+ from pathlib import Path
6
+ from safe_subprocess import run
7
+ from libeval import run_without_exn
8
+
9
+
10
+ def eval_script(path: Path):
11
+ result = run(["racket", str(path)])
12
+
13
+ if (
14
+ "standard-module-name-resolver: collection not found\n for module path: rackunit"
15
+ in result.stderr
16
+ ):
17
+ print(f"Failed to run evaluation for {path}: rackunit is not installed")
18
+ return None
19
+
20
+ # rackunit produces exit code 0 even if tests fail.
21
+ if len(result.stderr) > 0 or result.exit_code != 0:
22
+ if "read-syntax" in result.stderr:
23
+ status = "SyntaxError"
24
+ else:
25
+ status = "Exception"
26
+ else:
27
+ status = "OK"
28
+
29
+ return {
30
+ "status": status,
31
+ "exit_code": result.exit_code,
32
+ "stdout": result.stdout,
33
+ "stderr": result.stderr,
34
+ }
35
+
36
+
37
+ def main():
38
+ directory = Path(
39
+ Path(__file__).parent, "..", "datasets", "racket-keep-code_davinci_001_temp_0.2"
40
+ ).resolve()
41
+
42
+ for filename in os.listdir(directory):
43
+ r = eval_script(Path.joinpath(directory, filename))
44
+ filename = filename.split(".")[0]
45
+ print(f"Racket,{filename},{r['status']}")
46
+
47
+
48
+ if __name__ == "__main__":
49
+ main()
src/eval_ruby.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from sys import exit
3
+ import subprocess
4
+ from pathlib import Path
5
+ from generic_eval import main as gmain
6
+
7
+ def eval_script(path: Path):
8
+ try:
9
+ # Assumes exit-code 0 is all okay
10
+ # Need check=True for Ruby to pass errors to CalledProcessError
11
+ output = subprocess.run(
12
+ ["ruby", path], check=True, capture_output=True, timeout=5
13
+ )
14
+ if output.returncode == 0:
15
+ status = "OK"
16
+ out = output.stderr
17
+ error = output.stdout
18
+ returncode = 0
19
+ else:
20
+ raise Exception("there's an issue with check = True for Ruby, INVESTIGATE!")
21
+ except subprocess.TimeoutExpired as exc:
22
+ status = "Timeout"
23
+ out = exc.stdout
24
+ error = exc.stderr
25
+ returncode = -1
26
+ except subprocess.CalledProcessError as exc:
27
+ returncode = exc.returncode
28
+ out = exc.stdout
29
+ error = exc.stderr
30
+ #failure with code 1 but no error message is an Exception from Failed tests
31
+ if len(error) < 1:
32
+ status = "Exception"
33
+ else: #everything that prints out an error message is a SyntaxError
34
+ status = "SyntaxError"
35
+ return {
36
+ "status": status,
37
+ "exit_code": returncode,
38
+ "stdout": out,
39
+ "stderr": error,
40
+ }
41
+
42
+ if __name__ == "__main__":
43
+ gmain(eval_script, 'Ruby', '.rb')
src/eval_rust.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import tempfile
4
+ from pathlib import Path
5
+ from generic_eval import main
6
+
7
+ LANG_NAME = "Rust"
8
+ LANG_EXT = ".rs"
9
+
10
+ def eval_script(path: Path):
11
+ basename = ".".join(str(path).split(".")[:-1])
12
+ try:
13
+ build = subprocess.run(["rustc", path, "-o", basename], capture_output=True, timeout=15)
14
+ except subprocess.TimeoutExpired as exc:
15
+ return {
16
+ "status": "Timeout",
17
+ "exit_code": -1,
18
+ "stdout": "Compiler timeout",
19
+ "stderr": "Compiler timeout",
20
+ }
21
+ status = None
22
+ returncode = -1
23
+ output = None
24
+ if build.returncode != 0:
25
+ # Well, it's a compile error. May be a type error or
26
+ # something. But, why break the set convention
27
+ status = "SyntaxError"
28
+ returncode = build.returncode
29
+ output = build
30
+ else:
31
+ try:
32
+ # Assumes exit-code 0 is all okay
33
+ output = subprocess.run([basename], capture_output=True, timeout=5)
34
+ returncode = output.returncode
35
+ if output.returncode == 0:
36
+ status = "OK"
37
+ else:
38
+ # Well, it's a panic
39
+ status = "Exception"
40
+ except subprocess.TimeoutExpired as exc:
41
+ status = "Timeout"
42
+ output = exc
43
+ os.remove(basename)
44
+ return {
45
+ "status": status,
46
+ "exit_code": returncode,
47
+ "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
48
+ "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
49
+ }
50
+
51
+ if __name__ == "__main__":
52
+ main(eval_script, LANG_NAME, LANG_EXT)
53
+
src/eval_scala.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import tempfile
3
+ from safe_subprocess import run
4
+
5
+ LANG_NAME = "Scala"
6
+ LANG_EXT = ".scala"
7
+
8
+ def eval_script(path: Path):
9
+ with tempfile.TemporaryDirectory() as outdir:
10
+ # Each Scala file contains the class with same name `JAVA_CLASS_NAME`
11
+ # Hence, scalac will same JAVA_CLASS_NAME.class file for each problem
12
+ # Write class for each problem to a different temp dir
13
+ build = run(["scalac", "-d", outdir, path], timeout_seconds=45)
14
+ if build.exit_code != 0:
15
+ # Well, it's a compile error. May be a type error or
16
+ # something. But, why break the set convention
17
+ return {
18
+ "status": "SyntaxError",
19
+ "exit_code": build.exit_code,
20
+ "stdout": build.stdout,
21
+ "stderr": build.stderr,
22
+ }
23
+ # "Problem" is the name of the class we emit.
24
+ r = run(["scala", "-cp", f"{outdir}", "Problem"])
25
+ if r.timeout:
26
+ status = "Timeout"
27
+ elif r.exit_code == 0 and r.stderr == "":
28
+ status = "OK"
29
+ else:
30
+ # Well, it's a panic
31
+ status = "Exception"
32
+ return {
33
+ "status": status,
34
+ "exit_code": r.exit_code,
35
+ "stdout": r.stdout,
36
+ "stderr": r.stderr,
37
+ }
src/eval_sh.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+ LANG_NAME = "bash"
5
+ LANG_EXT = ".sh"
6
+
7
+ def eval_script(path: Path):
8
+ # Capture output - will be generated regardless of success, fail, or syntax error
9
+ p = run(["bash", path])
10
+ if p.timeout:
11
+ status = "Timeout"
12
+ elif p.exit_code == 0:
13
+ status = "OK"
14
+ elif "syntax error" in p.stderr:
15
+ status = "SyntaxError"
16
+ else:
17
+ status = "Exception"
18
+
19
+ return {
20
+ "status": status,
21
+ "exit_code": p.exit_code,
22
+ "stdout": p.stdout,
23
+ "stderr": p.stderr,
24
+ }
src/eval_swift.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ from pathlib import Path
3
+ import os
4
+ from safe_subprocess import run
5
+
6
+ def eval_script(path: Path):
7
+ basename = ".".join(str(path).split(".")[:-1])
8
+ r = run(["swiftc", path, "-o", basename], timeout_seconds=45)
9
+ if r.timeout:
10
+ status = "Timeout"
11
+ elif r.exit_code != 0:
12
+ # Well, it's a compile error. May be a type error or
13
+ # something. But, why break the set convention
14
+ status = "SyntaxError"
15
+ else:
16
+ r = run([basename], timeout_seconds=5)
17
+ if r.timeout:
18
+ status = "Timeout"
19
+ elif r.exit_code != 0:
20
+ # Well, it's a panic
21
+ status = "Exception"
22
+ else:
23
+ status = "OK"
24
+ os.remove(basename)
25
+ return {
26
+ "status": status,
27
+ "exit_code": r.exit_code,
28
+ "stdout": r.stdout,
29
+ "stderr": r.stderr,
30
+ }
src/eval_ts.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+
4
+
5
+ def eval_script(path: Path):
6
+ r = run(["tsc", "--target", "esnext", str(path)], timeout_seconds=15)
7
+ if r.exit_code != 0:
8
+ return {
9
+ "status": "SyntaxError",
10
+ "exit_code": r.exit_code,
11
+ "stdout": r.stdout,
12
+ "stderr": r.stderr,
13
+ }
14
+
15
+ r = run(["node", str(path).replace(".ts", ".js")], timeout_seconds=15)
16
+ if r.timeout:
17
+ status = "Timeout"
18
+ elif r.exit_code == 0:
19
+ status = "OK"
20
+ elif "ERR_ASSERTION" in r.stderr:
21
+ status = "AssertionError"
22
+ elif "SyntaxError" in r.stderr:
23
+ status = "SyntaxError"
24
+ elif "ReferenceError" in r.stderr:
25
+ status = "ReferenceError"
26
+ else:
27
+ status = "Exception"
28
+ return {
29
+ "status": status,
30
+ "exit_code": r.exit_code,
31
+ "stdout": r.stdout,
32
+ "stderr": r.stderr,
33
+ }
src/eval_v.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from safe_subprocess import run
3
+ import subprocess
4
+
5
+ # return codes for coqc:
6
+ # 0: compilation goes through
7
+ # 1: some sort of error (nondescript)
8
+
9
+ def eval_script(path: Path):
10
+ cleanup_extensions = ['.vo', '.vok', '.vos']
11
+
12
+ try:
13
+ # sadly there seems to be no way to verify proofs in a coq file without compiling
14
+ output = subprocess.run(["coqc", "-noglob", str(path)], capture_output=True, timeout=5)
15
+ outmessage = str(output)
16
+
17
+ if output.returncode == 0:
18
+ status = "OK"
19
+ # cleanup: remove files generated by coqc
20
+ for ext in cleanup_extensions:
21
+ file_to_remove = path.with_suffix(ext)
22
+ if file_to_remove.exists():
23
+ file_to_remove.unlink()
24
+
25
+ elif "Unable to unify" in outmessage:
26
+ status = "AssertionError"
27
+ else:
28
+ status = "SyntaxError"
29
+ returncode = output.returncode
30
+
31
+ except subprocess.TimeoutExpired as exc:
32
+ status = "Timeout"
33
+ output = exc
34
+ returncode = -1
35
+ return {
36
+ "status": status,
37
+ "exit_code": returncode,
38
+ "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
39
+ "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
40
+ }