style theme
Browse files- app.py +4 -2
- src/about.py +5 -25
app.py
CHANGED
@@ -731,6 +731,8 @@ with demo:
|
|
731 |
inputs=[column_selector],
|
732 |
outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
|
733 |
)
|
|
|
|
|
734 |
|
735 |
with gr.TabItem("Visualize", elem_id="guardbench-viz-tab", id=1):
|
736 |
with gr.Row():
|
@@ -781,8 +783,8 @@ with demo:
|
|
781 |
outputs=[model_selector]
|
782 |
)
|
783 |
|
784 |
-
with gr.TabItem("About", elem_id="guardbench-about-tab", id=2):
|
785 |
-
|
786 |
|
787 |
with gr.TabItem("Submit", elem_id="guardbench-submit-tab", id=3):
|
788 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
|
|
731 |
inputs=[column_selector],
|
732 |
outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
|
733 |
)
|
734 |
+
with gr.Row():
|
735 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
736 |
|
737 |
with gr.TabItem("Visualize", elem_id="guardbench-viz-tab", id=1):
|
738 |
with gr.Row():
|
|
|
783 |
outputs=[model_selector]
|
784 |
)
|
785 |
|
786 |
+
# with gr.TabItem("About", elem_id="guardbench-about-tab", id=2):
|
787 |
+
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
788 |
|
789 |
with gr.TabItem("Submit", elem_id="guardbench-submit-tab", id=3):
|
790 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
src/about.py
CHANGED
@@ -12,7 +12,7 @@ INTRODUCTION_TEXT = """
|
|
12 |
## Introduction
|
13 |
|
14 |
GuardBench is a comprehensive benchmark for evaluating the safety guardrails of large language models (LLMs).
|
15 |
-
This leaderboard tracks model performance across various safety categories, including harmful content detection,
|
16 |
jailbreak resistance, and more.
|
17 |
|
18 |
Models are evaluated on their ability to properly refuse harmful requests and detect problematic content
|
@@ -20,33 +20,13 @@ across multiple categories and test scenarios.
|
|
20 |
"""
|
21 |
|
22 |
LLM_BENCHMARKS_TEXT = """
|
23 |
-
## GuardBench
|
24 |
|
25 |
-
GuardBench
|
26 |
|
27 |
-
|
28 |
-
- **Manipulation, Deception, and Misinformation**
|
29 |
-
- **Creative Content Involving Illicit Themes**
|
30 |
-
- **Sexual Content and Violence**
|
31 |
-
- **Political Corruption and Legal Evasion**
|
32 |
-
- **Labor Exploitation and Human Trafficking**
|
33 |
-
- **Environmental and Industrial Harm**
|
34 |
-
- **Animal Cruelty and Exploitation**
|
35 |
-
- **Self-Harm and Suicidal Ideation**
|
36 |
-
- **Safe Prompts**
|
37 |
|
38 |
-
|
39 |
-
- **Default Prompts**: Standard harmful requests
|
40 |
-
- **Jailbreaked Prompts**: Adversarial attempts to bypass safety measures
|
41 |
-
- **Default Answers**: Evaluation of harmful content in responses
|
42 |
-
- **Jailbreaked Answers**: Evaluation of harmful content in responses to jailbreak attempts
|
43 |
-
|
44 |
-
Metrics include:
|
45 |
-
- **F1 Binary**: Harmonic mean of precision and recall
|
46 |
-
- **Recall Binary**: Proportion of actual harmful content correctly identified
|
47 |
-
- **Precision Binary**: Proportion of identified harmful content that was actually harmful
|
48 |
-
- **Error Ratio**: Rate of errors during evaluation
|
49 |
-
- **Average Runtime**: Average processing time in milliseconds
|
50 |
"""
|
51 |
|
52 |
EVALUATION_QUEUE_TEXT = """
|
|
|
12 |
## Introduction
|
13 |
|
14 |
GuardBench is a comprehensive benchmark for evaluating the safety guardrails of large language models (LLMs).
|
15 |
+
This leaderboard tracks model performance across various safety categories, including harmful content detection,
|
16 |
jailbreak resistance, and more.
|
17 |
|
18 |
Models are evaluated on their ability to properly refuse harmful requests and detect problematic content
|
|
|
20 |
"""
|
21 |
|
22 |
LLM_BENCHMARKS_TEXT = """
|
23 |
+
## GuardBench evaluation methodology
|
24 |
|
25 |
+
GuardBench checks how well models handle safety challenges — from misinformation and self-harm to sexual content and corruption.
|
26 |
|
27 |
+
Models are tested with regular and adversarial prompts to see if they can avoid saying harmful things.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
We track how accurate they are, how often they make mistakes, and how fast they respond.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
"""
|
31 |
|
32 |
EVALUATION_QUEUE_TEXT = """
|