taesiri commited on
Commit
5b905b8
·
1 Parent(s): 05e4334
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -38,12 +38,13 @@ def load_data(files, model_type):
38
 
39
  # Load and label all data
40
  data = load_data(noncot_results, "Text Only")
 
41
  vision_data = load_data(vision_results, "Vision")
42
  cot_text_data = load_data(cot_text_results, "CoT Text Only")
43
  # cot_vision_data = load_data(cot_vision_results, "CoT Vision")
44
 
45
  # Combine all data into a single DataFrame
46
- all_data = pd.concat([data, vision_data, cot_text_data], ignore_index=True)
47
 
48
  all_model_names = all_data["Model Name"].unique()
49
  all_text_only_model_names = list(
@@ -414,7 +415,7 @@ def generate_heatmap_for_intersection_model(model_name):
414
 
415
  sns.despine(ax=ax, top=True, right=True, left=True, bottom=True)
416
 
417
- plt.close(fig) # Prevent it from showing immediately
418
  return fig
419
 
420
 
@@ -443,7 +444,7 @@ with gr.Blocks() as demo:
443
  heatmap_image_qwen = gr.Image(label="", show_label=False)
444
  leader_board.select(fn=load_heatmap_qwen, outputs=[heatmap_image_qwen])
445
 
446
- with gr.Tab("Vision Benchmark"):
447
  gr.Markdown("# Vision Benchmark Leaderboard")
448
  leader_board_vision = gr.Dataframe(
449
  vision_accuracy_df, headers=headers_with_icons
@@ -454,7 +455,7 @@ with gr.Blocks() as demo:
454
  fn=load_vision_heatmap, outputs=[heatmap_image_vision]
455
  )
456
 
457
- with gr.Tab("Text-only Benchmark (CoT)"):
458
  gr.Markdown("# Text-only Leaderboard (CoT)")
459
  cot_leader_board_text = gr.Dataframe(
460
  cot_text_accuracy_df, headers=headers_with_icons
@@ -499,7 +500,7 @@ with gr.Blocks() as demo:
499
  queue=True,
500
  )
501
 
502
- with gr.Tab("Constraint Text-only Results (CoT)"):
503
  gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)")
504
  included_models_cot = gr.CheckboxGroup(
505
  label="Models to include",
@@ -514,14 +515,14 @@ with gr.Blocks() as demo:
514
  constrained_leader_board_text_cot = gr.Dataframe()
515
  constrained_leader_board_plot_cot = gr.Plot()
516
 
517
- with gr.Tab("Majority Vote (Subset 1)"):
518
  gr.Markdown("## Majority Vote (Subset 1)")
519
  intersection_leader_board = gr.Dataframe(
520
  intersection_df_acc, headers=headers_with_icons
521
  )
522
  heatmap_image = gr.Plot(label="Model Heatmap")
523
 
524
- with gr.Tab("Text-only Benchmark (deprecated)"):
525
  gr.Markdown("# Text-only Leaderboard")
526
  leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
527
  gr.Markdown("## Heatmap")
 
38
 
39
  # Load and label all data
40
  data = load_data(noncot_results, "Text Only")
41
+ data_qwen = load_data(noncot_results_qwen, "Text Only")
42
  vision_data = load_data(vision_results, "Vision")
43
  cot_text_data = load_data(cot_text_results, "CoT Text Only")
44
  # cot_vision_data = load_data(cot_vision_results, "CoT Vision")
45
 
46
  # Combine all data into a single DataFrame
47
+ all_data = pd.concat([data_qwen, vision_data, cot_text_data], ignore_index=True)
48
 
49
  all_model_names = all_data["Model Name"].unique()
50
  all_text_only_model_names = list(
 
415
 
416
  sns.despine(ax=ax, top=True, right=True, left=True, bottom=True)
417
 
418
+ plt.close(fig)
419
  return fig
420
 
421
 
 
444
  heatmap_image_qwen = gr.Image(label="", show_label=False)
445
  leader_board.select(fn=load_heatmap_qwen, outputs=[heatmap_image_qwen])
446
 
447
+ with gr.Tab("Vision Benchmark", visible=False):
448
  gr.Markdown("# Vision Benchmark Leaderboard")
449
  leader_board_vision = gr.Dataframe(
450
  vision_accuracy_df, headers=headers_with_icons
 
455
  fn=load_vision_heatmap, outputs=[heatmap_image_vision]
456
  )
457
 
458
+ with gr.Tab("Text-only Benchmark (CoT)", visible=False):
459
  gr.Markdown("# Text-only Leaderboard (CoT)")
460
  cot_leader_board_text = gr.Dataframe(
461
  cot_text_accuracy_df, headers=headers_with_icons
 
500
  queue=True,
501
  )
502
 
503
+ with gr.Tab("Constraint Text-only Results (CoT)", visible=False):
504
  gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)")
505
  included_models_cot = gr.CheckboxGroup(
506
  label="Models to include",
 
515
  constrained_leader_board_text_cot = gr.Dataframe()
516
  constrained_leader_board_plot_cot = gr.Plot()
517
 
518
+ with gr.Tab("Majority Vote (Subset 1)", visible=False):
519
  gr.Markdown("## Majority Vote (Subset 1)")
520
  intersection_leader_board = gr.Dataframe(
521
  intersection_df_acc, headers=headers_with_icons
522
  )
523
  heatmap_image = gr.Plot(label="Model Heatmap")
524
 
525
+ with gr.Tab("Text-only Benchmark (deprecated)", visible=False):
526
  gr.Markdown("# Text-only Leaderboard")
527
  leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
528
  gr.Markdown("## Heatmap")