Spaces:
Sleeping
Sleeping
朱东升
commited on
Commit
·
30b1610
1
Parent(s):
c4f1102
- .DS_Store +0 -0
- Dockerfile +15 -0
- app.py +99 -0
- requirements.txt +3 -0
- src/__init__.py +0 -0
- src/containerized_eval.py +100 -0
- src/eval_adb.py +64 -0
- src/eval_clj.py +30 -0
- src/eval_cpp.py +40 -0
- src/eval_cs.py +68 -0
- src/eval_dart.py +27 -0
- src/eval_dfy.py +29 -0
- src/eval_dlang.py +63 -0
- src/eval_elixir.py +37 -0
- src/eval_fs.py +17 -0
- src/eval_go.py +42 -0
- src/eval_hs.py +19 -0
- src/eval_java.py +50 -0
- src/eval_javascript.py +49 -0
- src/eval_julia.py +21 -0
- src/eval_lean.py +29 -0
- src/eval_lua.py +17 -0
- src/eval_luau.py +26 -0
- src/eval_matlab.py +53 -0
- src/eval_ocaml.py +21 -0
- src/eval_php.py +20 -0
- src/eval_pl.py +20 -0
- src/eval_python.py +19 -0
- src/eval_r.py +47 -0
- src/eval_racket.py +49 -0
- src/eval_ruby.py +43 -0
- src/eval_rust.py +53 -0
- src/eval_scala.py +37 -0
- src/eval_sh.py +24 -0
- src/eval_swift.py +30 -0
- src/eval_ts.py +33 -0
- src/eval_v.py +40 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Dockerfile
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM ghcr.io/nuprl/multipl-e-evaluation@sha256:11864ca95774df16c34b4cd1eac231f9e5466c7ea38dac98e5b5b053e18479de
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
6 |
+
|
7 |
+
COPY requirements.txt .
|
8 |
+
RUN pip install -r requirements.txt
|
9 |
+
|
10 |
+
COPY app.py .
|
11 |
+
COPY src/ ./src/
|
12 |
+
|
13 |
+
EXPOSE 7860
|
14 |
+
|
15 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import json
|
3 |
+
import importlib
|
4 |
+
import os
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
def evaluate(input_data):
|
8 |
+
"""评估代码的主函数
|
9 |
+
|
10 |
+
Args:
|
11 |
+
input_data: 可以是字符串(文件路径)或字典(包含代码信息)
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
dict: 包含评估结果的字典
|
15 |
+
"""
|
16 |
+
try:
|
17 |
+
# 如果输入是文件路径
|
18 |
+
if isinstance(input_data, str):
|
19 |
+
with open(input_data, 'r') as f:
|
20 |
+
code = f.read()
|
21 |
+
# 从文件扩展名确定语言
|
22 |
+
language = Path(input_data).suffix[1:]
|
23 |
+
result = evaluate_code(code, language)
|
24 |
+
return result
|
25 |
+
|
26 |
+
# 如果输入是字典
|
27 |
+
elif isinstance(input_data, dict):
|
28 |
+
language = input_data.get('language')
|
29 |
+
completions = input_data.get('completions', [])
|
30 |
+
|
31 |
+
if not completions:
|
32 |
+
return {"status": "Exception", "error": "No code provided"}
|
33 |
+
|
34 |
+
# 评估所有完成的代码
|
35 |
+
results = []
|
36 |
+
for code in completions:
|
37 |
+
result = evaluate_code(code, language)
|
38 |
+
results.append(result)
|
39 |
+
|
40 |
+
# 如果任一代码执行成功,则返回成功
|
41 |
+
if any(r["status"] == "OK" for r in results):
|
42 |
+
return {"status": "pass"}
|
43 |
+
else:
|
44 |
+
return results[0] # 返回第一个失败结果
|
45 |
+
|
46 |
+
else:
|
47 |
+
return {"status": "Exception", "error": "Invalid input format"}
|
48 |
+
|
49 |
+
except Exception as e:
|
50 |
+
return {"status": "Exception", "error": str(e)}
|
51 |
+
|
52 |
+
def evaluate_code(code, language):
|
53 |
+
"""评估特定语言的代码
|
54 |
+
|
55 |
+
Args:
|
56 |
+
code (str): 要评估的代码
|
57 |
+
language (str): 编程语言
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
dict: 包含评估结果的字典
|
61 |
+
"""
|
62 |
+
try:
|
63 |
+
# 动态导入对应语言的评估模块
|
64 |
+
module_name = f"src.eval_{language.lower()}"
|
65 |
+
module = importlib.import_module(module_name)
|
66 |
+
|
67 |
+
# 创建临时文件存储代码
|
68 |
+
temp_dir = Path("temp")
|
69 |
+
temp_dir.mkdir(exist_ok=True)
|
70 |
+
temp_file = temp_dir / f"temp.{language}"
|
71 |
+
|
72 |
+
with open(temp_file, "w") as f:
|
73 |
+
f.write(code)
|
74 |
+
|
75 |
+
# 调用对应语言的评估函数
|
76 |
+
result = module.eval_script(temp_file)
|
77 |
+
|
78 |
+
# 清理临时文件
|
79 |
+
if temp_file.exists():
|
80 |
+
temp_file.unlink()
|
81 |
+
|
82 |
+
return result
|
83 |
+
|
84 |
+
except ImportError:
|
85 |
+
return {"status": "Exception", "error": f"Language {language} not supported"}
|
86 |
+
except Exception as e:
|
87 |
+
return {"status": "Exception", "error": str(e)}
|
88 |
+
|
89 |
+
# 创建Gradio接口
|
90 |
+
demo = gr.Interface(
|
91 |
+
fn=evaluate,
|
92 |
+
inputs=gr.JSON(),
|
93 |
+
outputs=gr.JSON(),
|
94 |
+
title="代码评估服务",
|
95 |
+
description="支持多种编程语言的代码评估服务"
|
96 |
+
)
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=3.50.2
|
2 |
+
pathlib>=1.0.1
|
3 |
+
importlib>=1.0.4
|
src/__init__.py
ADDED
File without changes
|
src/containerized_eval.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
NOTE: Nothing containerized about this any more. This is just a helper
|
3 |
+
for problem_evaluator.py.
|
4 |
+
"""
|
5 |
+
|
6 |
+
from pathlib import Path
|
7 |
+
import eval_adb
|
8 |
+
import eval_ruby
|
9 |
+
import eval_lua
|
10 |
+
import eval_python
|
11 |
+
import eval_rust
|
12 |
+
import eval_julia
|
13 |
+
import eval_java
|
14 |
+
import eval_lua
|
15 |
+
import eval_racket
|
16 |
+
import eval_javascript
|
17 |
+
import eval_swift
|
18 |
+
import eval_cpp
|
19 |
+
import eval_php
|
20 |
+
import eval_dlang
|
21 |
+
import eval_julia
|
22 |
+
import eval_r
|
23 |
+
import eval_fs
|
24 |
+
import eval_ocaml
|
25 |
+
import eval_matlab
|
26 |
+
import eval_hs
|
27 |
+
import eval_elixir
|
28 |
+
import eval_clj
|
29 |
+
import eval_v
|
30 |
+
import eval_lean
|
31 |
+
import eval_dart
|
32 |
+
import tempfile
|
33 |
+
|
34 |
+
|
35 |
+
EVALUATORS = {
|
36 |
+
"ada": (eval_adb.eval_script, ".adb"),
|
37 |
+
"rb": (eval_ruby.eval_script, ".rb"),
|
38 |
+
"lua": (eval_lua.eval_script, ".lua"),
|
39 |
+
"python": (eval_python.eval_script, ".py"),
|
40 |
+
"py": (eval_python.eval_script, ".py"),
|
41 |
+
"notypes.py": (eval_python.eval_script, ".py"),
|
42 |
+
"julia": (eval_julia.eval_script, ".jl"),
|
43 |
+
"java" : (eval_java.eval_script, ".java"),
|
44 |
+
"rust" : (eval_rust.eval_script, ".rs"),
|
45 |
+
"rs" : (eval_rust.eval_script, ".rs"),
|
46 |
+
"swift": (eval_swift.eval_script, ".swift"),
|
47 |
+
"lua": (eval_lua.eval_script, ".lua"),
|
48 |
+
"racket": (eval_racket.eval_script, ".rkt"),
|
49 |
+
"rkt": (eval_racket.eval_script, ".rkt"),
|
50 |
+
"javascript": (eval_javascript.eval_script, ".js"),
|
51 |
+
"js": (eval_javascript.eval_script, ".js"),
|
52 |
+
"cpp": (eval_cpp.eval_script, ".cpp"),
|
53 |
+
"php": (eval_php.eval_script, ".php"),
|
54 |
+
"humaneval_to_dlang.py": (eval_dlang.eval_script, ".d"),
|
55 |
+
"d": (eval_dlang.eval_script, ".d"),
|
56 |
+
"r": (eval_r.eval_script, ".r"),
|
57 |
+
"humaneval_to_r.py": (eval_r.eval_script, ".r"),
|
58 |
+
"jl": (eval_julia.eval_script, ".jl"),
|
59 |
+
"fs": (eval_fs.eval_script, ".fsx"),
|
60 |
+
"ml": (eval_ocaml.eval_script, ".ml"),
|
61 |
+
"m": (eval_matlab.eval_script, ".m"),
|
62 |
+
"hs": (eval_hs.eval_script, ".hs"),
|
63 |
+
"elixir": (eval_elixir.eval_script, ".exs"),
|
64 |
+
"clj": (eval_clj.eval_script, ".clj"),
|
65 |
+
"coq": (eval_v.eval_script, ".v"),
|
66 |
+
"lean": (eval_lean.eval_script, ".lean"),
|
67 |
+
"dart": (eval_dart.eval_script, ".dart"),
|
68 |
+
}
|
69 |
+
|
70 |
+
def eval_string_script(language, program):
|
71 |
+
if language in EVALUATORS:
|
72 |
+
(eval_script, file_ext) = EVALUATORS[language]
|
73 |
+
else:
|
74 |
+
eval_module = __import__(f"eval_{language}" if language != "go_test.go" else "eval_go")
|
75 |
+
eval_script = eval_module.eval_script
|
76 |
+
file_ext = f".{language}" if language != "go_test.go" else "_test.go"
|
77 |
+
with tempfile.NamedTemporaryFile(suffix=file_ext, delete=True) as f:
|
78 |
+
f.write(program.encode("utf-8"))
|
79 |
+
f.flush()
|
80 |
+
result = eval_script(Path(f.name))
|
81 |
+
# Only save the first 2K of output from the running program. Any futher
|
82 |
+
# output is very likely an exceptionally long stack trace or a long
|
83 |
+
# series of prints.
|
84 |
+
if type(result["stdout"]) == bytes:
|
85 |
+
result["stdout"] = result["stdout"].decode("utf-8", errors="ignore")
|
86 |
+
if result["stdout"] is None:
|
87 |
+
result["stdout"] = ""
|
88 |
+
if result["stderr"] is None:
|
89 |
+
result["stderr"] = ""
|
90 |
+
if type(result["stderr"]) == bytes:
|
91 |
+
result["stderr"] = result["stderr"].decode("utf-8", errors="ignore")
|
92 |
+
assert type(result["stdout"]) == str
|
93 |
+
assert type(result["stderr"]) == str
|
94 |
+
return {
|
95 |
+
"program": program,
|
96 |
+
"stdout": result['stdout'].replace("!!int", "")[:2048],
|
97 |
+
"stderr": result['stderr'][:2048],
|
98 |
+
"exit_code": result['exit_code'],
|
99 |
+
"status": result['status']
|
100 |
+
}
|
src/eval_adb.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
from generic_eval import main
|
4 |
+
|
5 |
+
|
6 |
+
LANG_NAME = "Ada"
|
7 |
+
LANG_EXT = ".adb"
|
8 |
+
|
9 |
+
|
10 |
+
def eval_script(path: Path):
|
11 |
+
working_dir: Path = path.parent / (path.stem + "_tmp")
|
12 |
+
working_dir.mkdir()
|
13 |
+
chop_result = run(["gnatchop", "-w", path, working_dir])
|
14 |
+
if chop_result.exit_code != 0:
|
15 |
+
return {
|
16 |
+
"status": "SyntaxError (gnatchop)",
|
17 |
+
"exit_code": chop_result.exit_code,
|
18 |
+
"stdout": chop_result.stdout,
|
19 |
+
"stderr": chop_result.stderr,
|
20 |
+
}
|
21 |
+
|
22 |
+
build_result = run(
|
23 |
+
[
|
24 |
+
"gnatmake",
|
25 |
+
"-gnatW8",
|
26 |
+
"main.adb",
|
27 |
+
"-o",
|
28 |
+
"main",
|
29 |
+
"-g",
|
30 |
+
"-j0",
|
31 |
+
"-gnata",
|
32 |
+
"-gnat2022",
|
33 |
+
"-gnateE",
|
34 |
+
"-bargs",
|
35 |
+
"-Es",
|
36 |
+
],
|
37 |
+
cwd=str(working_dir),
|
38 |
+
)
|
39 |
+
if build_result.exit_code != 0:
|
40 |
+
return {
|
41 |
+
"status": "SyntaxError (gnatmake)",
|
42 |
+
"exit_code": build_result.exit_code,
|
43 |
+
"stdout": build_result.stdout,
|
44 |
+
"stderr": build_result.stderr,
|
45 |
+
}
|
46 |
+
|
47 |
+
status = "OK"
|
48 |
+
run_result = run(["./main"], cwd=str(working_dir))
|
49 |
+
|
50 |
+
if run_result.timeout:
|
51 |
+
status = "Timeout"
|
52 |
+
elif run_result.exit_code != 0:
|
53 |
+
status = "Exception"
|
54 |
+
|
55 |
+
return {
|
56 |
+
"status": status,
|
57 |
+
"exit_code": run_result.exit_code,
|
58 |
+
"stdout": run_result.stdout,
|
59 |
+
"stderr": run_result.stderr,
|
60 |
+
}
|
61 |
+
|
62 |
+
|
63 |
+
if __name__ == "__main__":
|
64 |
+
main(eval_script, LANG_NAME, LANG_EXT)
|
src/eval_clj.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Evaluates a generated Clojure program (.clj).
|
3 |
+
"""
|
4 |
+
import os
|
5 |
+
from pathlib import Path
|
6 |
+
from safe_subprocess import run
|
7 |
+
from libeval import run_without_exn
|
8 |
+
|
9 |
+
|
10 |
+
def eval_script(path: Path):
|
11 |
+
result = run(["clojure", "-J-Dclojure.main.report=stderr", "-M", str(path)])
|
12 |
+
|
13 |
+
if result.timeout:
|
14 |
+
status = "Timeout"
|
15 |
+
elif result.exit_code != 0:
|
16 |
+
status = "Exception"
|
17 |
+
elif "\n0 failures, 0 errors.\n" in result.stdout:
|
18 |
+
status = "OK"
|
19 |
+
else: # test failure
|
20 |
+
status = "Exception"
|
21 |
+
|
22 |
+
return {
|
23 |
+
"status": status,
|
24 |
+
"exit_code": result.exit_code,
|
25 |
+
"stdout": result.stdout,
|
26 |
+
"stderr": result.stderr,
|
27 |
+
}
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
main()
|
src/eval_cpp.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
from generic_eval import main
|
4 |
+
|
5 |
+
LANG_NAME = "C++"
|
6 |
+
LANG_EXT = ".cpp"
|
7 |
+
|
8 |
+
|
9 |
+
def eval_script(path: Path):
|
10 |
+
basename = ".".join(str(path).split(".")[:-1])
|
11 |
+
build_result = run(["g++", path, "-o", basename, "-std=c++17"])
|
12 |
+
if build_result.exit_code != 0:
|
13 |
+
return {
|
14 |
+
"status": "SyntaxError",
|
15 |
+
"exit_code": build_result.exit_code,
|
16 |
+
"stdout": build_result.stdout,
|
17 |
+
"stderr": build_result.stderr,
|
18 |
+
}
|
19 |
+
|
20 |
+
run_result = run([basename])
|
21 |
+
if "In file included from /shared/centos7/gcc/9.2.0-skylake/" in run_result.stderr:
|
22 |
+
raise Exception("Skylake bug encountered")
|
23 |
+
if "/4.8.2" in run_result.stderr:
|
24 |
+
raise Exception("Ancient compiler encountered")
|
25 |
+
if run_result.timeout:
|
26 |
+
status = "Timeout"
|
27 |
+
elif run_result.exit_code != 0:
|
28 |
+
status = "Exception"
|
29 |
+
else:
|
30 |
+
status = "OK"
|
31 |
+
return {
|
32 |
+
"status": status,
|
33 |
+
"exit_code": run_result.exit_code,
|
34 |
+
"stdout": run_result.stdout,
|
35 |
+
"stderr": run_result.stderr,
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
if __name__ == "__main__":
|
40 |
+
main(eval_script, LANG_NAME, LANG_EXT)
|
src/eval_cs.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
import os
|
6 |
+
import subprocess
|
7 |
+
import tempfile
|
8 |
+
from pathlib import Path
|
9 |
+
from generic_eval import main
|
10 |
+
|
11 |
+
LANG_NAME = "CSharp"
|
12 |
+
LANG_EXT = ".cs"
|
13 |
+
|
14 |
+
#Following files have problems:
|
15 |
+
#137,
|
16 |
+
#22: Any
|
17 |
+
#148: Elipsis
|
18 |
+
|
19 |
+
def eval_script(path: str):
|
20 |
+
if ".cs" not in path.name:
|
21 |
+
return
|
22 |
+
basename = ".".join(str(path).split(".")[:-1])
|
23 |
+
binaryname = basename + ".exe"
|
24 |
+
build = subprocess.run(["csc", "/d:DEBUG", "-r:System.Numerics.dll", path, f"/out:{binaryname}"], capture_output=True)
|
25 |
+
status = None
|
26 |
+
returncode = -1
|
27 |
+
output = None
|
28 |
+
if build.returncode != 0:
|
29 |
+
# Well, it's a compile error. May be a type error or
|
30 |
+
# something. But, why break the set convention
|
31 |
+
status = "SyntaxError"
|
32 |
+
returncode = build.returncode
|
33 |
+
output = build
|
34 |
+
else:
|
35 |
+
try:
|
36 |
+
output = subprocess.run(["mono", binaryname], env={"PATH": os.getenv("PATH"), "MONO_TRACE_LISTENER":"Console.Error"}, capture_output=True, timeout=5)
|
37 |
+
returncode = output.returncode
|
38 |
+
output.stderr = str(output.stderr, "utf-8")
|
39 |
+
#mono return 0 even when failing
|
40 |
+
fail = "System.Diagnostics.DefaultTraceListener.Fail" in output.stderr or "Unhandled Exception" in output.stderr
|
41 |
+
output.returncode = 1 if fail else 0
|
42 |
+
if output.returncode == 0:
|
43 |
+
status = "OK"
|
44 |
+
else:
|
45 |
+
# Well, it's a panic
|
46 |
+
status = "Exception"
|
47 |
+
except subprocess.TimeoutExpired as exc:
|
48 |
+
status = "Timeout"
|
49 |
+
output = exc
|
50 |
+
os.remove(binaryname)
|
51 |
+
|
52 |
+
if output.stdout is not None:
|
53 |
+
output.stdout = output.stdout.decode("utf-8")
|
54 |
+
else:
|
55 |
+
output.stdout = "None"
|
56 |
+
|
57 |
+
if output.stderr == "":
|
58 |
+
output.stderr = "None"
|
59 |
+
|
60 |
+
return {
|
61 |
+
"status": status,
|
62 |
+
"exit_code": returncode,
|
63 |
+
"stdout": output.stdout,
|
64 |
+
"stderr": output.stderr,
|
65 |
+
}
|
66 |
+
|
67 |
+
if __name__ == "__main__":
|
68 |
+
main(eval_script, LANG_NAME, LANG_EXT)
|
src/eval_dart.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
|
5 |
+
def eval_script(path: Path):
|
6 |
+
r = run(["dart", "analyze", "--no-fatal-warnings", str(path)], timeout_seconds=15)
|
7 |
+
if r.exit_code != 0:
|
8 |
+
return {
|
9 |
+
"status": "SyntaxError",
|
10 |
+
"exit_code": r.exit_code,
|
11 |
+
"stdout": r.stdout,
|
12 |
+
"stderr": r.stderr,
|
13 |
+
}
|
14 |
+
|
15 |
+
r = run(["dart", str(path)], timeout_seconds=15)
|
16 |
+
if r.timeout:
|
17 |
+
status = "Timeout"
|
18 |
+
elif r.exit_code == 0:
|
19 |
+
status = "OK"
|
20 |
+
else:
|
21 |
+
status = "Exception"
|
22 |
+
return {
|
23 |
+
"status": status,
|
24 |
+
"exit_code": r.exit_code,
|
25 |
+
"stdout": r.stdout,
|
26 |
+
"stderr": r.stderr,
|
27 |
+
}
|
src/eval_dfy.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
# 0 – success
|
5 |
+
# 1 – invalid command-line arguments
|
6 |
+
# 2 – syntax, parse, or name or type resolution errors
|
7 |
+
# 3 – compilation errors
|
8 |
+
# 4 – verification errors
|
9 |
+
|
10 |
+
def eval_script(path: Path):
|
11 |
+
r = run(["dafny", "run", str(path)])
|
12 |
+
if r.timeout:
|
13 |
+
status = "Timeout"
|
14 |
+
elif r.exit_code == 0:
|
15 |
+
status = "OK"
|
16 |
+
elif r.exit_code == 2:
|
17 |
+
status = "SyntaxError"
|
18 |
+
elif r.exit_code == 3:
|
19 |
+
status = "CompilationError"
|
20 |
+
elif r.exit_code == 4:
|
21 |
+
status = "VerificationError"
|
22 |
+
else:
|
23 |
+
status = "Exception"
|
24 |
+
return {
|
25 |
+
"status": status,
|
26 |
+
"exit_code": r.exit_code,
|
27 |
+
"stdout": r.stdout,
|
28 |
+
"stderr": r.stderr,
|
29 |
+
}
|
src/eval_dlang.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
from pathlib import Path
|
4 |
+
from safe_subprocess import run
|
5 |
+
import sys
|
6 |
+
import re
|
7 |
+
|
8 |
+
ENABLE_SYNTAX_CHECK = False
|
9 |
+
|
10 |
+
def eval_script(path: Path):
|
11 |
+
result = run(["rdmd", "-unittest", str(path)], timeout_seconds=15)
|
12 |
+
if "might not be correctly installed" in result.stderr:
|
13 |
+
raise Exception("D is not correctly installed")
|
14 |
+
|
15 |
+
if result.timeout:
|
16 |
+
status = "Timeout"
|
17 |
+
elif result.exit_code == 0:
|
18 |
+
status = "OK"
|
19 |
+
elif "Error:" in result.stderr:
|
20 |
+
status = "SyntaxError"
|
21 |
+
else:
|
22 |
+
status = "Exception"
|
23 |
+
|
24 |
+
return {
|
25 |
+
"status": status,
|
26 |
+
"exit_code": result.exit_code,
|
27 |
+
"stdout": result.stdout,
|
28 |
+
"stderr": result.stderr,
|
29 |
+
}
|
30 |
+
|
31 |
+
DIR = "d-keep-code_davinci_001_temp_0.2"
|
32 |
+
def main():
|
33 |
+
directory = Path(Path(__file__).parent, "..", "datasets", DIR).resolve()
|
34 |
+
|
35 |
+
count = {"OK": 0, "Timeout": 0, "Exception": 0, "SyntaxError": 0}
|
36 |
+
for filename in os.listdir(directory):
|
37 |
+
path = Path.joinpath(directory, filename)
|
38 |
+
r = eval_script(path)
|
39 |
+
status = r["status"]
|
40 |
+
count[status] += 1
|
41 |
+
|
42 |
+
if ENABLE_SYNTAX_CHECK and status == "SyntaxError":
|
43 |
+
error_msgs = r["stderr"].split("\n")
|
44 |
+
with open(path) as source_file:
|
45 |
+
lines = source_file.readlines()
|
46 |
+
unittest_line_start = lines.index("unittest\n")
|
47 |
+
unittest_line_end = len(lines)
|
48 |
+
for err_msg_line in error_msgs:
|
49 |
+
matched_parts = re.match(r"(\/?.*?\.[\w:]+\/.*.d)\(([0-9]+)\): Error: (.*)", err_msg_line[2:-1])
|
50 |
+
_file, line_num = matched_parts[1], int(matched_parts[2])
|
51 |
+
if unittest_line_start <= line_num and line_num <= unittest_line_end:
|
52 |
+
print("===============")
|
53 |
+
print(path, "contains error in unit test part")
|
54 |
+
print(error_msgs)
|
55 |
+
print("===============")
|
56 |
+
|
57 |
+
filename = filename.split(".")[0]
|
58 |
+
print(f"Dlang,{filename},{status}")
|
59 |
+
|
60 |
+
print(DIR + ":" + str(count))
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
main()
|
src/eval_elixir.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from sys import exit
|
3 |
+
import subprocess
|
4 |
+
from pathlib import Path
|
5 |
+
from generic_eval import main as gmain
|
6 |
+
|
7 |
+
|
8 |
+
def eval_script(path: Path):
|
9 |
+
try:
|
10 |
+
# Assumes exit-code 0 is all okay
|
11 |
+
output = subprocess.run(["elixir", str(path)], capture_output=True, timeout=5)
|
12 |
+
|
13 |
+
if output.returncode == 0:
|
14 |
+
status = "OK"
|
15 |
+
else:
|
16 |
+
outmessage = str(output)
|
17 |
+
if "Assertion with == failed" in outmessage:
|
18 |
+
status = "AssertionError"
|
19 |
+
elif "SyntaxError" in outmessage:
|
20 |
+
status = "SyntaxError"
|
21 |
+
else:
|
22 |
+
status = "Exception"
|
23 |
+
returncode = output.returncode
|
24 |
+
except subprocess.TimeoutExpired as exc:
|
25 |
+
status = "Timeout"
|
26 |
+
output = exc
|
27 |
+
returncode = -1
|
28 |
+
return {
|
29 |
+
"status": status,
|
30 |
+
"exit_code": returncode,
|
31 |
+
"stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
|
32 |
+
"stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
|
33 |
+
}
|
34 |
+
|
35 |
+
|
36 |
+
if __name__ == "__main__":
|
37 |
+
gmain(eval_script, "Elixir", ".exs")
|
src/eval_fs.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
def eval_script(path: Path):
|
5 |
+
r = run(["dotnet", "fsi", "-d:DEBUG", str(path)])
|
6 |
+
if r.timeout:
|
7 |
+
status = "Timeout"
|
8 |
+
elif r.exit_code == 0:
|
9 |
+
status = "OK"
|
10 |
+
else:
|
11 |
+
status = "Exception"
|
12 |
+
return {
|
13 |
+
"status" : status,
|
14 |
+
"exit_code": r.exit_code,
|
15 |
+
"stdout": r.stdout,
|
16 |
+
"stderr": r.stderr,
|
17 |
+
}
|
src/eval_go.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from sys import exit
|
3 |
+
import subprocess
|
4 |
+
from pathlib import Path
|
5 |
+
from generic_eval import main as gmain
|
6 |
+
|
7 |
+
|
8 |
+
def eval_script(path: Path):
|
9 |
+
status = None
|
10 |
+
stdout = None
|
11 |
+
stderr = None
|
12 |
+
exit_code = None
|
13 |
+
try:
|
14 |
+
build = subprocess.run(["go", "test", path],
|
15 |
+
timeout=30,
|
16 |
+
stdout=subprocess.PIPE,
|
17 |
+
stderr=subprocess.PIPE)
|
18 |
+
|
19 |
+
stdout = build.stdout.decode("utf-8", errors="ignore")
|
20 |
+
stderr = build.stderr.decode("utf-8", errors="ignore")
|
21 |
+
exit_code = build.returncode
|
22 |
+
# write to stderr just so that we can redirect stdout to a csv
|
23 |
+
|
24 |
+
if "[setup failed]" in stdout or "[build failed]" in stdout:
|
25 |
+
status = "SyntaxError"
|
26 |
+
elif "FAIL" in stdout:
|
27 |
+
status = "Exception"
|
28 |
+
else:
|
29 |
+
status = "OK"
|
30 |
+
except subprocess.TimeoutExpired:
|
31 |
+
status = "Timeout"
|
32 |
+
|
33 |
+
return {
|
34 |
+
"status": status,
|
35 |
+
"exit_code": exit_code,
|
36 |
+
"stdout": stdout,
|
37 |
+
"stderr": stderr,
|
38 |
+
}
|
39 |
+
|
40 |
+
|
41 |
+
if __name__ == "__main__":
|
42 |
+
gmain(eval_script, 'Go', '.go')
|
src/eval_hs.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
def eval_script(path: Path):
|
5 |
+
r = run(["runghc", str(path)])
|
6 |
+
if r.timeout:
|
7 |
+
status = "Timeout"
|
8 |
+
elif r.exit_code == 0:
|
9 |
+
status = "OK"
|
10 |
+
elif "Syntax error":
|
11 |
+
status = "SyntaxError"
|
12 |
+
else:
|
13 |
+
status = "Exception"
|
14 |
+
return {
|
15 |
+
"status": status,
|
16 |
+
"exit_code": r.exit_code,
|
17 |
+
"stdout": r.stdout,
|
18 |
+
"stderr": r.stderr,
|
19 |
+
}
|
src/eval_java.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
from safe_subprocess import run
|
4 |
+
from pathlib import Path
|
5 |
+
from generic_eval import main
|
6 |
+
|
7 |
+
LANG_NAME = "Java"
|
8 |
+
LANG_EXT = ".java"
|
9 |
+
|
10 |
+
#Following files have problems:
|
11 |
+
#137,
|
12 |
+
#22: Any
|
13 |
+
#148: Elipsis
|
14 |
+
|
15 |
+
def eval_script(path: Path):
|
16 |
+
|
17 |
+
sys_env = os.environ.copy()
|
18 |
+
javatuples_path = Path("/usr/multiple/javatuples-1.2.jar")
|
19 |
+
|
20 |
+
sys_env["CLASSPATH"] = f"{javatuples_path}"
|
21 |
+
|
22 |
+
with tempfile.TemporaryDirectory() as outdir:
|
23 |
+
#Each Java file contains the class with same name `JAVA_CLASS_NAME`
|
24 |
+
#Hence, javac will same JAVA_CLASS_NAME.class file for each problem
|
25 |
+
#Write class for each problem to a different temp dir
|
26 |
+
#Use UTF8 encoding with javac
|
27 |
+
result = run(["javac", "-encoding", "UTF8", "-d", outdir, path], env=sys_env)
|
28 |
+
|
29 |
+
if result.exit_code != 0:
|
30 |
+
# Well, it's a compile error. May be a type error or
|
31 |
+
# something. But, why break the set convention
|
32 |
+
status = "SyntaxError"
|
33 |
+
else:
|
34 |
+
result = run(["java", "-ea", "-cp", f"{outdir}:{javatuples_path}", "Problem"], env = sys_env)
|
35 |
+
if result.timeout:
|
36 |
+
status = "Timeout"
|
37 |
+
elif result.exit_code == 0:
|
38 |
+
status = "OK"
|
39 |
+
else:
|
40 |
+
status = "Exception"
|
41 |
+
|
42 |
+
return {
|
43 |
+
"status": status,
|
44 |
+
"exit_code": result.exit_code,
|
45 |
+
"stdout": result.stdout,
|
46 |
+
"stderr": result.stderr,
|
47 |
+
}
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
main(eval_script, LANG_NAME, LANG_EXT)
|
src/eval_javascript.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
def eval_script(path: Path):
|
6 |
+
try:
|
7 |
+
# Assumes exit-code 0 is all okay
|
8 |
+
output = subprocess.run(["node", str(path)], capture_output=True, timeout=5)
|
9 |
+
|
10 |
+
if output.returncode == 0:
|
11 |
+
status = "OK"
|
12 |
+
else:
|
13 |
+
outmessage = str(output)
|
14 |
+
if 'ERR_ASSERTION' in outmessage:
|
15 |
+
status = "AssertionError"
|
16 |
+
elif 'SyntaxError' in outmessage:
|
17 |
+
status = "SyntaxError"
|
18 |
+
elif 'ReferenceError' in outmessage:
|
19 |
+
status = "ReferenceError"
|
20 |
+
else:
|
21 |
+
status = "Exception"
|
22 |
+
returncode = output.returncode
|
23 |
+
except subprocess.TimeoutExpired as exc:
|
24 |
+
status = "Timeout"
|
25 |
+
output = exc
|
26 |
+
returncode = -1
|
27 |
+
except subprocess.CalledProcessError as exc:
|
28 |
+
status = "Exception"
|
29 |
+
returncode = exc.returncode
|
30 |
+
output = exc
|
31 |
+
return {
|
32 |
+
"status": status,
|
33 |
+
"exit_code": returncode,
|
34 |
+
"stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
|
35 |
+
"stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
def main():
|
41 |
+
directory = Path(Path(__file__).parent, "..", "datasets", "js-keep-code_davinci_001_temp_0.2").resolve()
|
42 |
+
|
43 |
+
for filename in os.listdir(directory):
|
44 |
+
r = eval_script(Path.joinpath(directory,filename))
|
45 |
+
filename = filename.split(".")[0]
|
46 |
+
print(f"JavaScript,{filename},{r['status']}")
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
main()
|
src/eval_julia.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from safe_subprocess import run
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
def eval_script(path: Path):
|
5 |
+
result = run(["julia", str(path)], timeout_seconds=5)
|
6 |
+
if result.timeout:
|
7 |
+
status = "Timeout"
|
8 |
+
elif result.exit_code == 0:
|
9 |
+
status = "OK"
|
10 |
+
# TODO(arjun): I would like this to be reviewed more carefully by John.
|
11 |
+
elif len(result.stderr) < 1:
|
12 |
+
status = "Exception"
|
13 |
+
else:
|
14 |
+
status = "SyntaxError"
|
15 |
+
|
16 |
+
return {
|
17 |
+
"status": status,
|
18 |
+
"exit_code": result.exit_code,
|
19 |
+
"stdout": result.stdout,
|
20 |
+
"stderr": result.stderr,
|
21 |
+
}
|
src/eval_lean.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
def eval_script(path: Path):
|
6 |
+
# since lean is a theorem prover first and not a programming environment,
|
7 |
+
# the return code is always 1. idk.
|
8 |
+
try:
|
9 |
+
output = subprocess.run(["lean", str(path)], capture_output=True, timeout=5)
|
10 |
+
outmessage = str(output)
|
11 |
+
|
12 |
+
if "error: tactic 'rfl' failed" in outmessage: # :skull:
|
13 |
+
status = "AssertionError"
|
14 |
+
elif outmessage == "":
|
15 |
+
status = "OK"
|
16 |
+
else:
|
17 |
+
status = "SyntaxError"
|
18 |
+
returncode = output.returncode
|
19 |
+
|
20 |
+
except subprocess.TimeoutExpired as exc:
|
21 |
+
status = "Timeout"
|
22 |
+
output = exc
|
23 |
+
returncode = -1
|
24 |
+
return {
|
25 |
+
"status": status,
|
26 |
+
"exit_code": returncode,
|
27 |
+
"stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
|
28 |
+
"stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
|
29 |
+
}
|
src/eval_lua.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
def eval_script(path: Path):
|
5 |
+
r = run(["lua", str(path)])
|
6 |
+
if r.timeout:
|
7 |
+
status = "Timeout"
|
8 |
+
elif r.exit_code == 0:
|
9 |
+
status = "OK"
|
10 |
+
else:
|
11 |
+
status = "Exception"
|
12 |
+
return {
|
13 |
+
"status": status,
|
14 |
+
"exit_code": r.exit_code,
|
15 |
+
"stdout": r.stdout,
|
16 |
+
"stderr": r.stderr,
|
17 |
+
}
|
src/eval_luau.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
|
5 |
+
def eval_script(path: Path):
|
6 |
+
r = run(["luau-analyze", str(path)])
|
7 |
+
if r.timeout:
|
8 |
+
status = "Timeout"
|
9 |
+
elif r.exit_code == 0:
|
10 |
+
r = run(["luau", str(path)])
|
11 |
+
if r.timeout:
|
12 |
+
status = "Timeout"
|
13 |
+
elif r.exit_code == 0:
|
14 |
+
status = "OK"
|
15 |
+
else:
|
16 |
+
status = "Exception"
|
17 |
+
elif "SyntaxError" in r.stderr:
|
18 |
+
status = "SyntaxError"
|
19 |
+
else:
|
20 |
+
status = "TypeError"
|
21 |
+
return {
|
22 |
+
"status": status,
|
23 |
+
"exit_code": r.exit_code,
|
24 |
+
"stdout": r.stdout,
|
25 |
+
"stderr": r.stderr,
|
26 |
+
}
|
src/eval_matlab.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
def eval_script(path):
|
5 |
+
# Matlab has the requirement that all functions must appear at the end
|
6 |
+
# of the file. So we first have to write the call to the test-function at the
|
7 |
+
# beginning of the file.
|
8 |
+
with open(path, 'r') as f:
|
9 |
+
content = f.read()
|
10 |
+
content = f"test();\n{content}"
|
11 |
+
with open(path, 'w') as f:
|
12 |
+
f.write(content)
|
13 |
+
filename = path.stem
|
14 |
+
parent_dir = path.parent.absolute()
|
15 |
+
|
16 |
+
# We use the matlab.engine to run the script; however, the way that the
|
17 |
+
# matlab engine works requires that we call the script as if it were a
|
18 |
+
# member of the matlab.engine object. So we have to write a python script
|
19 |
+
# that calls the matlab script. This also ensures that the script is called
|
20 |
+
# in a safe-subprocess. Who needs runtime reflection when you have IPC?
|
21 |
+
program= f"""
|
22 |
+
import matlab.engine
|
23 |
+
import io
|
24 |
+
import sys
|
25 |
+
out = io.StringIO()
|
26 |
+
err = io.StringIO()
|
27 |
+
eng = matlab.engine.start_matlab()
|
28 |
+
eng.addpath(r'{parent_dir}',nargout=0)
|
29 |
+
try:
|
30 |
+
r = eng.{filename}(nargout=0, stdout=out,stderr=err)
|
31 |
+
print(out.getvalue())
|
32 |
+
except matlab.engine.MatlabExecutionError as e:
|
33 |
+
print(err.getvalue(), file=sys.stderr)
|
34 |
+
"""
|
35 |
+
r = run(["python3", "-c", program], timeout_seconds=30)
|
36 |
+
|
37 |
+
# This is still somewhat brittle.
|
38 |
+
if r.timeout:
|
39 |
+
status = "Timeout"
|
40 |
+
exit_code = -1
|
41 |
+
elif r.stderr == "":
|
42 |
+
status = "OK"
|
43 |
+
exit_code = 0
|
44 |
+
else:
|
45 |
+
status = "Exception"
|
46 |
+
exit_code = 1
|
47 |
+
|
48 |
+
return {
|
49 |
+
"status": status,
|
50 |
+
"exit_code": exit_code,
|
51 |
+
"stdout": r.stdout,
|
52 |
+
"stderr": r.stderr,
|
53 |
+
}
|
src/eval_ocaml.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
def eval_script(path: Path):
|
5 |
+
r = run(["ocaml", str(path)])
|
6 |
+
if r.timeout:
|
7 |
+
status = "Timeout"
|
8 |
+
elif r.exit_code == 0:
|
9 |
+
status = "OK"
|
10 |
+
elif "Assert_failure" in r.stderr:
|
11 |
+
status = "AssertionError"
|
12 |
+
elif "Syntax error" in r.stderr:
|
13 |
+
status = "SyntaxError"
|
14 |
+
else:
|
15 |
+
status = "Exception"
|
16 |
+
return {
|
17 |
+
"status": status,
|
18 |
+
"exit_code": r.exit_code,
|
19 |
+
"stdout": r.stdout,
|
20 |
+
"stderr": r.stderr,
|
21 |
+
}
|
src/eval_php.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
LANG_NAME = "PHP"
|
5 |
+
LANG_EXT = ".php"
|
6 |
+
|
7 |
+
def eval_script(path: Path):
|
8 |
+
r = run(["php", path])
|
9 |
+
if "PHP Parse error" in r.stdout:
|
10 |
+
status = "SyntaxError"
|
11 |
+
elif r.exit_code != 0:
|
12 |
+
status = "Exception"
|
13 |
+
else:
|
14 |
+
status = "OK"
|
15 |
+
return {
|
16 |
+
"status": status,
|
17 |
+
"exit_code": r.exit_code,
|
18 |
+
"stdout": r.stdout,
|
19 |
+
"stderr": r.stderr,
|
20 |
+
}
|
src/eval_pl.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
def eval_script(path: Path):
|
5 |
+
r = run(["perl", path])
|
6 |
+
|
7 |
+
if r.timeout:
|
8 |
+
status = "Timeout"
|
9 |
+
elif r.exit_code != 0:
|
10 |
+
status = "Exception"
|
11 |
+
elif "ERROR" in r.stdout or "ERROR" in r.stderr:
|
12 |
+
status = "Exception"
|
13 |
+
else:
|
14 |
+
status = "OK"
|
15 |
+
return {
|
16 |
+
"status": status,
|
17 |
+
"exit_code": r.exit_code,
|
18 |
+
"stdout": r.stdout,
|
19 |
+
"stderr": r.stderr,
|
20 |
+
}
|
src/eval_python.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
def eval_script(path: Path):
|
5 |
+
r = run(["python3", str(path)])
|
6 |
+
if r.timeout:
|
7 |
+
status = "Timeout"
|
8 |
+
elif r.exit_code == 0:
|
9 |
+
status = "OK"
|
10 |
+
elif "SyntaxError" in r.stderr:
|
11 |
+
status = "SyntaxError"
|
12 |
+
else:
|
13 |
+
status = "Exception"
|
14 |
+
return {
|
15 |
+
"status" : status,
|
16 |
+
"exit_code": r.exit_code,
|
17 |
+
"stdout": r.stdout,
|
18 |
+
"stderr": r.stderr,
|
19 |
+
}
|
src/eval_r.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
def eval_script(path: Path):
|
6 |
+
try:
|
7 |
+
# Assumes exit-code 0 is all okay
|
8 |
+
# Run R on the file, capturing stderr
|
9 |
+
output = subprocess.run(["Rscript", str(path)], capture_output=True, timeout=5)
|
10 |
+
if output.returncode == 0:
|
11 |
+
status = "OK"
|
12 |
+
else:
|
13 |
+
outmessage = str(output)
|
14 |
+
if 'unexpected' in outmessage:
|
15 |
+
status = "SyntaxError"
|
16 |
+
elif "err=b''" in outmessage:
|
17 |
+
status = "AssertionError"
|
18 |
+
else:
|
19 |
+
status = "Exception"
|
20 |
+
returncode = output.returncode
|
21 |
+
except subprocess.TimeoutExpired as exc:
|
22 |
+
status = "Timeout"
|
23 |
+
output = exc
|
24 |
+
returncode = -1
|
25 |
+
except subprocess.CalledProcessError as exc:
|
26 |
+
status = "Exception"
|
27 |
+
returncode = exc.returncode
|
28 |
+
output = exc
|
29 |
+
return {
|
30 |
+
"status": status,
|
31 |
+
"exit_code": returncode,
|
32 |
+
"stdout": output.stdout,
|
33 |
+
"stderr": output.stderr
|
34 |
+
}
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
def main():
|
39 |
+
directory = Path(Path(__file__).parent, "..", "datasets", "R-keep-code_davinci_001_temp_0.2").resolve()
|
40 |
+
|
41 |
+
for filename in os.listdir(directory):
|
42 |
+
r = eval_script(Path.joinpath(directory,filename))
|
43 |
+
filename = filename.split(".")[0]
|
44 |
+
print(f"R,{filename},{r['status']}")
|
45 |
+
|
46 |
+
if __name__ == "__main__":
|
47 |
+
main()
|
src/eval_racket.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Evaluates a generated Racket program (.rkt).
|
3 |
+
"""
|
4 |
+
import os
|
5 |
+
from pathlib import Path
|
6 |
+
from safe_subprocess import run
|
7 |
+
from libeval import run_without_exn
|
8 |
+
|
9 |
+
|
10 |
+
def eval_script(path: Path):
|
11 |
+
result = run(["racket", str(path)])
|
12 |
+
|
13 |
+
if (
|
14 |
+
"standard-module-name-resolver: collection not found\n for module path: rackunit"
|
15 |
+
in result.stderr
|
16 |
+
):
|
17 |
+
print(f"Failed to run evaluation for {path}: rackunit is not installed")
|
18 |
+
return None
|
19 |
+
|
20 |
+
# rackunit produces exit code 0 even if tests fail.
|
21 |
+
if len(result.stderr) > 0 or result.exit_code != 0:
|
22 |
+
if "read-syntax" in result.stderr:
|
23 |
+
status = "SyntaxError"
|
24 |
+
else:
|
25 |
+
status = "Exception"
|
26 |
+
else:
|
27 |
+
status = "OK"
|
28 |
+
|
29 |
+
return {
|
30 |
+
"status": status,
|
31 |
+
"exit_code": result.exit_code,
|
32 |
+
"stdout": result.stdout,
|
33 |
+
"stderr": result.stderr,
|
34 |
+
}
|
35 |
+
|
36 |
+
|
37 |
+
def main():
|
38 |
+
directory = Path(
|
39 |
+
Path(__file__).parent, "..", "datasets", "racket-keep-code_davinci_001_temp_0.2"
|
40 |
+
).resolve()
|
41 |
+
|
42 |
+
for filename in os.listdir(directory):
|
43 |
+
r = eval_script(Path.joinpath(directory, filename))
|
44 |
+
filename = filename.split(".")[0]
|
45 |
+
print(f"Racket,{filename},{r['status']}")
|
46 |
+
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
main()
|
src/eval_ruby.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from sys import exit
|
3 |
+
import subprocess
|
4 |
+
from pathlib import Path
|
5 |
+
from generic_eval import main as gmain
|
6 |
+
|
7 |
+
def eval_script(path: Path):
|
8 |
+
try:
|
9 |
+
# Assumes exit-code 0 is all okay
|
10 |
+
# Need check=True for Ruby to pass errors to CalledProcessError
|
11 |
+
output = subprocess.run(
|
12 |
+
["ruby", path], check=True, capture_output=True, timeout=5
|
13 |
+
)
|
14 |
+
if output.returncode == 0:
|
15 |
+
status = "OK"
|
16 |
+
out = output.stderr
|
17 |
+
error = output.stdout
|
18 |
+
returncode = 0
|
19 |
+
else:
|
20 |
+
raise Exception("there's an issue with check = True for Ruby, INVESTIGATE!")
|
21 |
+
except subprocess.TimeoutExpired as exc:
|
22 |
+
status = "Timeout"
|
23 |
+
out = exc.stdout
|
24 |
+
error = exc.stderr
|
25 |
+
returncode = -1
|
26 |
+
except subprocess.CalledProcessError as exc:
|
27 |
+
returncode = exc.returncode
|
28 |
+
out = exc.stdout
|
29 |
+
error = exc.stderr
|
30 |
+
#failure with code 1 but no error message is an Exception from Failed tests
|
31 |
+
if len(error) < 1:
|
32 |
+
status = "Exception"
|
33 |
+
else: #everything that prints out an error message is a SyntaxError
|
34 |
+
status = "SyntaxError"
|
35 |
+
return {
|
36 |
+
"status": status,
|
37 |
+
"exit_code": returncode,
|
38 |
+
"stdout": out,
|
39 |
+
"stderr": error,
|
40 |
+
}
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
gmain(eval_script, 'Ruby', '.rb')
|
src/eval_rust.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
import tempfile
|
4 |
+
from pathlib import Path
|
5 |
+
from generic_eval import main
|
6 |
+
|
7 |
+
LANG_NAME = "Rust"
|
8 |
+
LANG_EXT = ".rs"
|
9 |
+
|
10 |
+
def eval_script(path: Path):
|
11 |
+
basename = ".".join(str(path).split(".")[:-1])
|
12 |
+
try:
|
13 |
+
build = subprocess.run(["rustc", path, "-o", basename], capture_output=True, timeout=15)
|
14 |
+
except subprocess.TimeoutExpired as exc:
|
15 |
+
return {
|
16 |
+
"status": "Timeout",
|
17 |
+
"exit_code": -1,
|
18 |
+
"stdout": "Compiler timeout",
|
19 |
+
"stderr": "Compiler timeout",
|
20 |
+
}
|
21 |
+
status = None
|
22 |
+
returncode = -1
|
23 |
+
output = None
|
24 |
+
if build.returncode != 0:
|
25 |
+
# Well, it's a compile error. May be a type error or
|
26 |
+
# something. But, why break the set convention
|
27 |
+
status = "SyntaxError"
|
28 |
+
returncode = build.returncode
|
29 |
+
output = build
|
30 |
+
else:
|
31 |
+
try:
|
32 |
+
# Assumes exit-code 0 is all okay
|
33 |
+
output = subprocess.run([basename], capture_output=True, timeout=5)
|
34 |
+
returncode = output.returncode
|
35 |
+
if output.returncode == 0:
|
36 |
+
status = "OK"
|
37 |
+
else:
|
38 |
+
# Well, it's a panic
|
39 |
+
status = "Exception"
|
40 |
+
except subprocess.TimeoutExpired as exc:
|
41 |
+
status = "Timeout"
|
42 |
+
output = exc
|
43 |
+
os.remove(basename)
|
44 |
+
return {
|
45 |
+
"status": status,
|
46 |
+
"exit_code": returncode,
|
47 |
+
"stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
|
48 |
+
"stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
|
49 |
+
}
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
main(eval_script, LANG_NAME, LANG_EXT)
|
53 |
+
|
src/eval_scala.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import tempfile
|
3 |
+
from safe_subprocess import run
|
4 |
+
|
5 |
+
LANG_NAME = "Scala"
|
6 |
+
LANG_EXT = ".scala"
|
7 |
+
|
8 |
+
def eval_script(path: Path):
|
9 |
+
with tempfile.TemporaryDirectory() as outdir:
|
10 |
+
# Each Scala file contains the class with same name `JAVA_CLASS_NAME`
|
11 |
+
# Hence, scalac will same JAVA_CLASS_NAME.class file for each problem
|
12 |
+
# Write class for each problem to a different temp dir
|
13 |
+
build = run(["scalac", "-d", outdir, path], timeout_seconds=45)
|
14 |
+
if build.exit_code != 0:
|
15 |
+
# Well, it's a compile error. May be a type error or
|
16 |
+
# something. But, why break the set convention
|
17 |
+
return {
|
18 |
+
"status": "SyntaxError",
|
19 |
+
"exit_code": build.exit_code,
|
20 |
+
"stdout": build.stdout,
|
21 |
+
"stderr": build.stderr,
|
22 |
+
}
|
23 |
+
# "Problem" is the name of the class we emit.
|
24 |
+
r = run(["scala", "-cp", f"{outdir}", "Problem"])
|
25 |
+
if r.timeout:
|
26 |
+
status = "Timeout"
|
27 |
+
elif r.exit_code == 0 and r.stderr == "":
|
28 |
+
status = "OK"
|
29 |
+
else:
|
30 |
+
# Well, it's a panic
|
31 |
+
status = "Exception"
|
32 |
+
return {
|
33 |
+
"status": status,
|
34 |
+
"exit_code": r.exit_code,
|
35 |
+
"stdout": r.stdout,
|
36 |
+
"stderr": r.stderr,
|
37 |
+
}
|
src/eval_sh.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
LANG_NAME = "bash"
|
5 |
+
LANG_EXT = ".sh"
|
6 |
+
|
7 |
+
def eval_script(path: Path):
|
8 |
+
# Capture output - will be generated regardless of success, fail, or syntax error
|
9 |
+
p = run(["bash", path])
|
10 |
+
if p.timeout:
|
11 |
+
status = "Timeout"
|
12 |
+
elif p.exit_code == 0:
|
13 |
+
status = "OK"
|
14 |
+
elif "syntax error" in p.stderr:
|
15 |
+
status = "SyntaxError"
|
16 |
+
else:
|
17 |
+
status = "Exception"
|
18 |
+
|
19 |
+
return {
|
20 |
+
"status": status,
|
21 |
+
"exit_code": p.exit_code,
|
22 |
+
"stdout": p.stdout,
|
23 |
+
"stderr": p.stderr,
|
24 |
+
}
|
src/eval_swift.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
from pathlib import Path
|
3 |
+
import os
|
4 |
+
from safe_subprocess import run
|
5 |
+
|
6 |
+
def eval_script(path: Path):
|
7 |
+
basename = ".".join(str(path).split(".")[:-1])
|
8 |
+
r = run(["swiftc", path, "-o", basename], timeout_seconds=45)
|
9 |
+
if r.timeout:
|
10 |
+
status = "Timeout"
|
11 |
+
elif r.exit_code != 0:
|
12 |
+
# Well, it's a compile error. May be a type error or
|
13 |
+
# something. But, why break the set convention
|
14 |
+
status = "SyntaxError"
|
15 |
+
else:
|
16 |
+
r = run([basename], timeout_seconds=5)
|
17 |
+
if r.timeout:
|
18 |
+
status = "Timeout"
|
19 |
+
elif r.exit_code != 0:
|
20 |
+
# Well, it's a panic
|
21 |
+
status = "Exception"
|
22 |
+
else:
|
23 |
+
status = "OK"
|
24 |
+
os.remove(basename)
|
25 |
+
return {
|
26 |
+
"status": status,
|
27 |
+
"exit_code": r.exit_code,
|
28 |
+
"stdout": r.stdout,
|
29 |
+
"stderr": r.stderr,
|
30 |
+
}
|
src/eval_ts.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
|
4 |
+
|
5 |
+
def eval_script(path: Path):
|
6 |
+
r = run(["tsc", "--target", "esnext", str(path)], timeout_seconds=15)
|
7 |
+
if r.exit_code != 0:
|
8 |
+
return {
|
9 |
+
"status": "SyntaxError",
|
10 |
+
"exit_code": r.exit_code,
|
11 |
+
"stdout": r.stdout,
|
12 |
+
"stderr": r.stderr,
|
13 |
+
}
|
14 |
+
|
15 |
+
r = run(["node", str(path).replace(".ts", ".js")], timeout_seconds=15)
|
16 |
+
if r.timeout:
|
17 |
+
status = "Timeout"
|
18 |
+
elif r.exit_code == 0:
|
19 |
+
status = "OK"
|
20 |
+
elif "ERR_ASSERTION" in r.stderr:
|
21 |
+
status = "AssertionError"
|
22 |
+
elif "SyntaxError" in r.stderr:
|
23 |
+
status = "SyntaxError"
|
24 |
+
elif "ReferenceError" in r.stderr:
|
25 |
+
status = "ReferenceError"
|
26 |
+
else:
|
27 |
+
status = "Exception"
|
28 |
+
return {
|
29 |
+
"status": status,
|
30 |
+
"exit_code": r.exit_code,
|
31 |
+
"stdout": r.stdout,
|
32 |
+
"stderr": r.stderr,
|
33 |
+
}
|
src/eval_v.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from safe_subprocess import run
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
# return codes for coqc:
|
6 |
+
# 0: compilation goes through
|
7 |
+
# 1: some sort of error (nondescript)
|
8 |
+
|
9 |
+
def eval_script(path: Path):
|
10 |
+
cleanup_extensions = ['.vo', '.vok', '.vos']
|
11 |
+
|
12 |
+
try:
|
13 |
+
# sadly there seems to be no way to verify proofs in a coq file without compiling
|
14 |
+
output = subprocess.run(["coqc", "-noglob", str(path)], capture_output=True, timeout=5)
|
15 |
+
outmessage = str(output)
|
16 |
+
|
17 |
+
if output.returncode == 0:
|
18 |
+
status = "OK"
|
19 |
+
# cleanup: remove files generated by coqc
|
20 |
+
for ext in cleanup_extensions:
|
21 |
+
file_to_remove = path.with_suffix(ext)
|
22 |
+
if file_to_remove.exists():
|
23 |
+
file_to_remove.unlink()
|
24 |
+
|
25 |
+
elif "Unable to unify" in outmessage:
|
26 |
+
status = "AssertionError"
|
27 |
+
else:
|
28 |
+
status = "SyntaxError"
|
29 |
+
returncode = output.returncode
|
30 |
+
|
31 |
+
except subprocess.TimeoutExpired as exc:
|
32 |
+
status = "Timeout"
|
33 |
+
output = exc
|
34 |
+
returncode = -1
|
35 |
+
return {
|
36 |
+
"status": status,
|
37 |
+
"exit_code": returncode,
|
38 |
+
"stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
|
39 |
+
"stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
|
40 |
+
}
|