categories rename
Browse files- app.py +26 -8
- logs/guardbench_20250424_132721_81730502.log +2 -0
- logs/guardbench_20250424_132735_5fe7accd.log +2 -0
- logs/guardbench_20250424_133606_bfae8d28.log +2 -0
- logs/guardbench_20250424_133744_d3ca5956.log +2 -0
- logs/guardbench_20250424_133847_cf6f7f0f.log +2 -0
- logs/guardbench_20250424_133952_920f4d61.log +2 -0
- logs/guardbench_20250424_134044_2df3fadc.log +17 -0
app.py
CHANGED
@@ -577,6 +577,22 @@ def update_visualization(selected_models, selected_category, selected_metric, ve
|
|
577 |
# Create Gradio app
|
578 |
demo = gr.Blocks(css=custom_css, theme=custom_theme)
|
579 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
with demo:
|
581 |
gr.HTML(TITLE)
|
582 |
# gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
@@ -628,12 +644,14 @@ with demo:
|
|
628 |
# Create tabs for each category
|
629 |
with gr.Tabs(elem_classes="category-tabs") as category_tabs:
|
630 |
# First tab for average metrics across all categories
|
631 |
-
with gr.TabItem("
|
632 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
633 |
|
634 |
-
# Create a tab for each category
|
635 |
for category in CATEGORIES:
|
636 |
-
|
|
|
|
|
637 |
category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
|
638 |
category_leaderboard = init_leaderboard(category_df)
|
639 |
|
@@ -752,12 +770,12 @@ with demo:
|
|
752 |
interactive=True
|
753 |
)
|
754 |
with gr.Column():
|
755 |
-
# Add Overall Performance to categories
|
756 |
-
|
757 |
category_selector = gr.Dropdown(
|
758 |
-
choices=
|
759 |
label="Select Category",
|
760 |
-
value=
|
761 |
interactive=True
|
762 |
)
|
763 |
metric_selector = gr.Dropdown(
|
@@ -772,7 +790,7 @@ with demo:
|
|
772 |
# Update visualization when any selector changes
|
773 |
for control in [viz_version_selector, model_selector, category_selector, metric_selector]:
|
774 |
control.change(
|
775 |
-
fn=update_visualization,
|
776 |
inputs=[model_selector, category_selector, metric_selector, viz_version_selector],
|
777 |
outputs=plot_output
|
778 |
)
|
|
|
577 |
# Create Gradio app
|
578 |
demo = gr.Blocks(css=custom_css, theme=custom_theme)
|
579 |
|
580 |
+
# Mapping from original category names to display names
|
581 |
+
CATEGORY_DISPLAY_MAP = {
|
582 |
+
"Criminal, Violent, and Terrorist Activity": "Crime & Violence",
|
583 |
+
"Manipulation, Deception, and Misinformation": "Misinformation",
|
584 |
+
"Creative Content Involving Illicit Themes": "Illicit Creative",
|
585 |
+
"Sexual Content and Violence": "Sexual Content",
|
586 |
+
"Political Corruption and Legal Evasion": "Corruption & Legal Evasion",
|
587 |
+
"Labor Exploitation and Human Trafficking": "Labor Exploitation",
|
588 |
+
"Environmental and Industrial Harm": "Environmental & Industrial Harm",
|
589 |
+
"Animal Cruelty and Exploitation": "Animal Harm",
|
590 |
+
"Self–Harm and Suicidal Ideation": "Self-Harm",
|
591 |
+
"Safe Prompts": "Safe Prompts"
|
592 |
+
}
|
593 |
+
# Create reverse mapping for lookups
|
594 |
+
CATEGORY_REVERSE_MAP = {v: k for k, v in CATEGORY_DISPLAY_MAP.items()}
|
595 |
+
|
596 |
with demo:
|
597 |
gr.HTML(TITLE)
|
598 |
# gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
644 |
# Create tabs for each category
|
645 |
with gr.Tabs(elem_classes="category-tabs") as category_tabs:
|
646 |
# First tab for average metrics across all categories
|
647 |
+
with gr.TabItem("All Results", elem_id="overall-tab"):
|
648 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
649 |
|
650 |
+
# Create a tab for each category using display names
|
651 |
for category in CATEGORIES:
|
652 |
+
display_name = CATEGORY_DISPLAY_MAP.get(category, category)
|
653 |
+
elem_id = f"category-{display_name.lower().replace(' ', '-').replace('&', 'and')}-tab"
|
654 |
+
with gr.TabItem(display_name, elem_id=elem_id):
|
655 |
category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
|
656 |
category_leaderboard = init_leaderboard(category_df)
|
657 |
|
|
|
770 |
interactive=True
|
771 |
)
|
772 |
with gr.Column():
|
773 |
+
# Add Overall Performance to categories, use display names
|
774 |
+
viz_categories_display = ["All Results"] + [CATEGORY_DISPLAY_MAP.get(cat, cat) for cat in CATEGORIES]
|
775 |
category_selector = gr.Dropdown(
|
776 |
+
choices=viz_categories_display,
|
777 |
label="Select Category",
|
778 |
+
value=viz_categories_display[0],
|
779 |
interactive=True
|
780 |
)
|
781 |
metric_selector = gr.Dropdown(
|
|
|
790 |
# Update visualization when any selector changes
|
791 |
for control in [viz_version_selector, model_selector, category_selector, metric_selector]:
|
792 |
control.change(
|
793 |
+
fn=lambda sm, sc, s_metric, v: update_visualization(sm, CATEGORY_REVERSE_MAP.get(sc, sc), s_metric, v),
|
794 |
inputs=[model_selector, category_selector, metric_selector, viz_version_selector],
|
795 |
outputs=plot_output
|
796 |
)
|
logs/guardbench_20250424_132721_81730502.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
2025-04-24 13:27:22,432 - __main__ - INFO - Initializing leaderboard data...
|
2 |
+
2025-04-24 13:27:22,600 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_132735_5fe7accd.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
2025-04-24 13:27:35,986 - __main__ - INFO - Initializing leaderboard data...
|
2 |
+
2025-04-24 13:27:36,096 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133606_bfae8d28.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
2025-04-24 13:36:07,557 - __main__ - INFO - Initializing leaderboard data...
|
2 |
+
2025-04-24 13:36:07,754 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133744_d3ca5956.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
2025-04-24 13:37:44,999 - __main__ - INFO - Initializing leaderboard data...
|
2 |
+
2025-04-24 13:37:45,167 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133847_cf6f7f0f.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
2025-04-24 13:38:48,713 - __main__ - INFO - Initializing leaderboard data...
|
2 |
+
2025-04-24 13:38:48,956 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133952_920f4d61.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
2025-04-24 13:39:53,041 - __main__ - INFO - Initializing leaderboard data...
|
2 |
+
2025-04-24 13:39:53,311 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_134044_2df3fadc.log
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-04-24 13:40:44,934 - __main__ - INFO - Initializing leaderboard data...
|
2 |
+
2025-04-24 13:40:45,088 - __main__ - INFO - Loaded leaderboard with 0 entries
|
3 |
+
2025-04-24 13:40:45,155 - __main__ - INFO - Available columns in LEADERBOARD_DF: ['model_name', 'model_type', 'guard_model_type', 'integral_score', 'macro_accuracy', 'macro_recall', 'micro_avg_error_ratio', 'micro_avg_runtime_ms', 'total_evals_count']
|
4 |
+
2025-04-24 13:40:45,158 - __main__ - WARNING - Initializing empty leaderboard
|
5 |
+
2025-04-24 13:40:45,672 - __main__ - WARNING - Initializing empty leaderboard
|
6 |
+
2025-04-24 13:40:45,758 - __main__ - WARNING - Initializing empty leaderboard
|
7 |
+
2025-04-24 13:40:45,862 - __main__ - WARNING - Initializing empty leaderboard
|
8 |
+
2025-04-24 13:40:45,950 - __main__ - WARNING - Initializing empty leaderboard
|
9 |
+
2025-04-24 13:40:46,035 - __main__ - WARNING - Initializing empty leaderboard
|
10 |
+
2025-04-24 13:40:46,122 - __main__ - WARNING - Initializing empty leaderboard
|
11 |
+
2025-04-24 13:40:46,389 - __main__ - WARNING - Initializing empty leaderboard
|
12 |
+
2025-04-24 13:40:46,473 - __main__ - WARNING - Initializing empty leaderboard
|
13 |
+
2025-04-24 13:40:46,593 - __main__ - WARNING - Initializing empty leaderboard
|
14 |
+
2025-04-24 13:40:46,686 - __main__ - WARNING - Initializing empty leaderboard
|
15 |
+
2025-04-24 13:40:46,869 - apscheduler.scheduler - INFO - Adding job tentatively -- it will be properly scheduled when the scheduler starts
|
16 |
+
2025-04-24 13:40:46,869 - apscheduler.scheduler - INFO - Added job "refresh_data" to job store "default"
|
17 |
+
2025-04-24 13:40:46,870 - apscheduler.scheduler - INFO - Scheduler started
|