apsys commited on
Commit
caa4d36
·
1 Parent(s): 5ce0e8b

categories rename

Browse files
app.py CHANGED
@@ -577,6 +577,22 @@ def update_visualization(selected_models, selected_category, selected_metric, ve
577
  # Create Gradio app
578
  demo = gr.Blocks(css=custom_css, theme=custom_theme)
579
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
  with demo:
581
  gr.HTML(TITLE)
582
  # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
@@ -628,12 +644,14 @@ with demo:
628
  # Create tabs for each category
629
  with gr.Tabs(elem_classes="category-tabs") as category_tabs:
630
  # First tab for average metrics across all categories
631
- with gr.TabItem("Overall Performance", elem_id="overall-tab"):
632
  leaderboard = init_leaderboard(LEADERBOARD_DF)
633
 
634
- # Create a tab for each category
635
  for category in CATEGORIES:
636
- with gr.TabItem(f"{category}", elem_id=f"category-{category.lower().replace(' ', '-')}-tab"):
 
 
637
  category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
638
  category_leaderboard = init_leaderboard(category_df)
639
 
@@ -752,12 +770,12 @@ with demo:
752
  interactive=True
753
  )
754
  with gr.Column():
755
- # Add Overall Performance to categories
756
- viz_categories = ["Overall Performance"] + CATEGORIES
757
  category_selector = gr.Dropdown(
758
- choices=viz_categories,
759
  label="Select Category",
760
- value=viz_categories[0],
761
  interactive=True
762
  )
763
  metric_selector = gr.Dropdown(
@@ -772,7 +790,7 @@ with demo:
772
  # Update visualization when any selector changes
773
  for control in [viz_version_selector, model_selector, category_selector, metric_selector]:
774
  control.change(
775
- fn=update_visualization,
776
  inputs=[model_selector, category_selector, metric_selector, viz_version_selector],
777
  outputs=plot_output
778
  )
 
577
  # Create Gradio app
578
  demo = gr.Blocks(css=custom_css, theme=custom_theme)
579
 
580
+ # Mapping from original category names to display names
581
+ CATEGORY_DISPLAY_MAP = {
582
+ "Criminal, Violent, and Terrorist Activity": "Crime & Violence",
583
+ "Manipulation, Deception, and Misinformation": "Misinformation",
584
+ "Creative Content Involving Illicit Themes": "Illicit Creative",
585
+ "Sexual Content and Violence": "Sexual Content",
586
+ "Political Corruption and Legal Evasion": "Corruption & Legal Evasion",
587
+ "Labor Exploitation and Human Trafficking": "Labor Exploitation",
588
+ "Environmental and Industrial Harm": "Environmental & Industrial Harm",
589
+ "Animal Cruelty and Exploitation": "Animal Harm",
590
+ "Self–Harm and Suicidal Ideation": "Self-Harm",
591
+ "Safe Prompts": "Safe Prompts"
592
+ }
593
+ # Create reverse mapping for lookups
594
+ CATEGORY_REVERSE_MAP = {v: k for k, v in CATEGORY_DISPLAY_MAP.items()}
595
+
596
  with demo:
597
  gr.HTML(TITLE)
598
  # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
644
  # Create tabs for each category
645
  with gr.Tabs(elem_classes="category-tabs") as category_tabs:
646
  # First tab for average metrics across all categories
647
+ with gr.TabItem("All Results", elem_id="overall-tab"):
648
  leaderboard = init_leaderboard(LEADERBOARD_DF)
649
 
650
+ # Create a tab for each category using display names
651
  for category in CATEGORIES:
652
+ display_name = CATEGORY_DISPLAY_MAP.get(category, category)
653
+ elem_id = f"category-{display_name.lower().replace(' ', '-').replace('&', 'and')}-tab"
654
+ with gr.TabItem(display_name, elem_id=elem_id):
655
  category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
656
  category_leaderboard = init_leaderboard(category_df)
657
 
 
770
  interactive=True
771
  )
772
  with gr.Column():
773
+ # Add Overall Performance to categories, use display names
774
+ viz_categories_display = ["All Results"] + [CATEGORY_DISPLAY_MAP.get(cat, cat) for cat in CATEGORIES]
775
  category_selector = gr.Dropdown(
776
+ choices=viz_categories_display,
777
  label="Select Category",
778
+ value=viz_categories_display[0],
779
  interactive=True
780
  )
781
  metric_selector = gr.Dropdown(
 
790
  # Update visualization when any selector changes
791
  for control in [viz_version_selector, model_selector, category_selector, metric_selector]:
792
  control.change(
793
+ fn=lambda sm, sc, s_metric, v: update_visualization(sm, CATEGORY_REVERSE_MAP.get(sc, sc), s_metric, v),
794
  inputs=[model_selector, category_selector, metric_selector, viz_version_selector],
795
  outputs=plot_output
796
  )
logs/guardbench_20250424_132721_81730502.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2025-04-24 13:27:22,432 - __main__ - INFO - Initializing leaderboard data...
2
+ 2025-04-24 13:27:22,600 - __main__ - INFO - Loaded leaderboard with 0 entries
logs/guardbench_20250424_132735_5fe7accd.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2025-04-24 13:27:35,986 - __main__ - INFO - Initializing leaderboard data...
2
+ 2025-04-24 13:27:36,096 - __main__ - INFO - Loaded leaderboard with 0 entries
logs/guardbench_20250424_133606_bfae8d28.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2025-04-24 13:36:07,557 - __main__ - INFO - Initializing leaderboard data...
2
+ 2025-04-24 13:36:07,754 - __main__ - INFO - Loaded leaderboard with 0 entries
logs/guardbench_20250424_133744_d3ca5956.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2025-04-24 13:37:44,999 - __main__ - INFO - Initializing leaderboard data...
2
+ 2025-04-24 13:37:45,167 - __main__ - INFO - Loaded leaderboard with 0 entries
logs/guardbench_20250424_133847_cf6f7f0f.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2025-04-24 13:38:48,713 - __main__ - INFO - Initializing leaderboard data...
2
+ 2025-04-24 13:38:48,956 - __main__ - INFO - Loaded leaderboard with 0 entries
logs/guardbench_20250424_133952_920f4d61.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2025-04-24 13:39:53,041 - __main__ - INFO - Initializing leaderboard data...
2
+ 2025-04-24 13:39:53,311 - __main__ - INFO - Loaded leaderboard with 0 entries
logs/guardbench_20250424_134044_2df3fadc.log ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-24 13:40:44,934 - __main__ - INFO - Initializing leaderboard data...
2
+ 2025-04-24 13:40:45,088 - __main__ - INFO - Loaded leaderboard with 0 entries
3
+ 2025-04-24 13:40:45,155 - __main__ - INFO - Available columns in LEADERBOARD_DF: ['model_name', 'model_type', 'guard_model_type', 'integral_score', 'macro_accuracy', 'macro_recall', 'micro_avg_error_ratio', 'micro_avg_runtime_ms', 'total_evals_count']
4
+ 2025-04-24 13:40:45,158 - __main__ - WARNING - Initializing empty leaderboard
5
+ 2025-04-24 13:40:45,672 - __main__ - WARNING - Initializing empty leaderboard
6
+ 2025-04-24 13:40:45,758 - __main__ - WARNING - Initializing empty leaderboard
7
+ 2025-04-24 13:40:45,862 - __main__ - WARNING - Initializing empty leaderboard
8
+ 2025-04-24 13:40:45,950 - __main__ - WARNING - Initializing empty leaderboard
9
+ 2025-04-24 13:40:46,035 - __main__ - WARNING - Initializing empty leaderboard
10
+ 2025-04-24 13:40:46,122 - __main__ - WARNING - Initializing empty leaderboard
11
+ 2025-04-24 13:40:46,389 - __main__ - WARNING - Initializing empty leaderboard
12
+ 2025-04-24 13:40:46,473 - __main__ - WARNING - Initializing empty leaderboard
13
+ 2025-04-24 13:40:46,593 - __main__ - WARNING - Initializing empty leaderboard
14
+ 2025-04-24 13:40:46,686 - __main__ - WARNING - Initializing empty leaderboard
15
+ 2025-04-24 13:40:46,869 - apscheduler.scheduler - INFO - Adding job tentatively -- it will be properly scheduled when the scheduler starts
16
+ 2025-04-24 13:40:46,869 - apscheduler.scheduler - INFO - Added job "refresh_data" to job store "default"
17
+ 2025-04-24 13:40:46,870 - apscheduler.scheduler - INFO - Scheduler started