xhluca
commited on
Commit
·
b915816
1
Parent(s):
972a7b5
fix issues
Browse files- demo.py +14 -3
- requirements.txt +2 -1
demo.py
CHANGED
@@ -399,8 +399,19 @@ def get_judgment_path(base_judgments_dir, benchmark, agent, judge, task_id):
|
|
399 |
return judgment_path
|
400 |
|
401 |
|
402 |
-
def list_benchmarks():
|
403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
|
406 |
def list_agents(base_traj_dir, benchmark):
|
@@ -477,7 +488,7 @@ with gr.Blocks(title="AgentRewardBench Demo") as demo, gr.Row():
|
|
477 |
with gr.Column(scale=4):
|
478 |
benchmark_default = "WebArena"
|
479 |
benchmark_dd = gr.Dropdown(
|
480 |
-
label="Benchmark", choices=list_benchmarks(), value=benchmark_default
|
481 |
)
|
482 |
|
483 |
agents = list_agents(base_traj_dir, benchmark_default)
|
|
|
399 |
return judgment_path
|
400 |
|
401 |
|
402 |
+
def list_benchmarks(base_traj_dir):
|
403 |
+
benchmarks_all = list(benchmarks_dict.values())
|
404 |
+
# filter by the benchmarks that are in the base_traj_dir
|
405 |
+
benchmarks = []
|
406 |
+
for benchmark in benchmarks_all:
|
407 |
+
traj_dir = Path(base_traj_dir, benchmarks_inverse[benchmark])
|
408 |
+
traj_dir = traj_dir.resolve()
|
409 |
+
if traj_dir.exists():
|
410 |
+
benchmarks.append(benchmark)
|
411 |
+
# sort the benchmarks
|
412 |
+
benchmarks.sort()
|
413 |
+
return benchmarks
|
414 |
+
|
415 |
|
416 |
|
417 |
def list_agents(base_traj_dir, benchmark):
|
|
|
488 |
with gr.Column(scale=4):
|
489 |
benchmark_default = "WebArena"
|
490 |
benchmark_dd = gr.Dropdown(
|
491 |
+
label="Benchmark", choices=list_benchmarks(base_traj_dir), value=benchmark_default
|
492 |
)
|
493 |
|
494 |
agents = list_agents(base_traj_dir, benchmark_default)
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
tqdm
|
2 |
orjson
|
3 |
Pillow
|
4 |
-
pyparsing
|
|
|
|
1 |
tqdm
|
2 |
orjson
|
3 |
Pillow
|
4 |
+
pyparsing
|
5 |
+
gradio
|