xhluca commited on
Commit
b915816
·
1 Parent(s): 972a7b5

fix issues

Browse files
Files changed (2) hide show
  1. demo.py +14 -3
  2. requirements.txt +2 -1
demo.py CHANGED
@@ -399,8 +399,19 @@ def get_judgment_path(base_judgments_dir, benchmark, agent, judge, task_id):
399
  return judgment_path
400
 
401
 
402
- def list_benchmarks():
403
- return list(benchmarks_dict.values())
 
 
 
 
 
 
 
 
 
 
 
404
 
405
 
406
  def list_agents(base_traj_dir, benchmark):
@@ -477,7 +488,7 @@ with gr.Blocks(title="AgentRewardBench Demo") as demo, gr.Row():
477
  with gr.Column(scale=4):
478
  benchmark_default = "WebArena"
479
  benchmark_dd = gr.Dropdown(
480
- label="Benchmark", choices=list_benchmarks(), value=benchmark_default
481
  )
482
 
483
  agents = list_agents(base_traj_dir, benchmark_default)
 
399
  return judgment_path
400
 
401
 
402
+ def list_benchmarks(base_traj_dir):
403
+ benchmarks_all = list(benchmarks_dict.values())
404
+ # filter by the benchmarks that are in the base_traj_dir
405
+ benchmarks = []
406
+ for benchmark in benchmarks_all:
407
+ traj_dir = Path(base_traj_dir, benchmarks_inverse[benchmark])
408
+ traj_dir = traj_dir.resolve()
409
+ if traj_dir.exists():
410
+ benchmarks.append(benchmark)
411
+ # sort the benchmarks
412
+ benchmarks.sort()
413
+ return benchmarks
414
+
415
 
416
 
417
  def list_agents(base_traj_dir, benchmark):
 
488
  with gr.Column(scale=4):
489
  benchmark_default = "WebArena"
490
  benchmark_dd = gr.Dropdown(
491
+ label="Benchmark", choices=list_benchmarks(base_traj_dir), value=benchmark_default
492
  )
493
 
494
  agents = list_agents(base_traj_dir, benchmark_default)
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  tqdm
2
  orjson
3
  Pillow
4
- pyparsing
 
 
1
  tqdm
2
  orjson
3
  Pillow
4
+ pyparsing
5
+ gradio