mode
Browse files- app.py +11 -2
- src/display/utils.py +25 -25
app.py
CHANGED
@@ -13,7 +13,7 @@ import plotly.graph_objects as go
|
|
13 |
from apscheduler.schedulers.background import BackgroundScheduler
|
14 |
import numpy as np
|
15 |
from gradio.themes.utils import fonts, colors
|
16 |
-
from dataclasses import fields,
|
17 |
|
18 |
from src.about import (
|
19 |
CITATION_BUTTON_LABEL,
|
@@ -148,7 +148,16 @@ custom_theme = gr.themes.Default(
|
|
148 |
block_border_color_dark="#333333", # Cooler Grey
|
149 |
)
|
150 |
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
def update_column_choices(df):
|
153 |
"""Update column choices based on what's actually in the dataframe"""
|
154 |
if df is None or df.empty:
|
|
|
13 |
from apscheduler.schedulers.background import BackgroundScheduler
|
14 |
import numpy as np
|
15 |
from gradio.themes.utils import fonts, colors
|
16 |
+
from dataclasses import fields, dataclass
|
17 |
|
18 |
from src.about import (
|
19 |
CITATION_BUTTON_LABEL,
|
|
|
148 |
block_border_color_dark="#333333", # Cooler Grey
|
149 |
)
|
150 |
|
151 |
+
@dataclass
|
152 |
+
class ColumnInfo:
|
153 |
+
"""Information about a column in the leaderboard."""
|
154 |
+
name: str
|
155 |
+
display_name: str
|
156 |
+
type: str = "text"
|
157 |
+
hidden: bool = False
|
158 |
+
never_hidden: bool = False
|
159 |
+
displayed_by_default: bool = True
|
160 |
+
|
161 |
def update_column_choices(df):
|
162 |
"""Update column choices based on what's actually in the dataframe"""
|
163 |
if df is None or df.empty:
|
src/display/utils.py
CHANGED
@@ -109,7 +109,7 @@ class GuardBenchColumn:
|
|
109 |
))
|
110 |
submission_date: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
111 |
name="submission_date",
|
112 |
-
display_name="
|
113 |
displayed_by_default=False
|
114 |
))
|
115 |
version: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
@@ -146,37 +146,37 @@ class GuardBenchColumn:
|
|
146 |
# Default prompts metrics
|
147 |
default_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
148 |
name="default_prompts_f1_binary",
|
149 |
-
display_name="
|
150 |
type="number",
|
151 |
displayed_by_default=False
|
152 |
))
|
153 |
default_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
154 |
name="default_prompts_f1",
|
155 |
-
display_name="
|
156 |
type="number",
|
157 |
displayed_by_default=False
|
158 |
))
|
159 |
default_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
160 |
name="default_prompts_recall_binary",
|
161 |
-
display_name="
|
162 |
type="number",
|
163 |
displayed_by_default=False
|
164 |
))
|
165 |
default_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
166 |
name="default_prompts_precision_binary",
|
167 |
-
display_name="
|
168 |
type="number",
|
169 |
displayed_by_default=False
|
170 |
))
|
171 |
default_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
172 |
name="default_prompts_error_ratio",
|
173 |
-
display_name="
|
174 |
type="number",
|
175 |
displayed_by_default=False
|
176 |
))
|
177 |
default_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
178 |
name="default_prompts_avg_runtime_ms",
|
179 |
-
display_name="
|
180 |
type="number",
|
181 |
displayed_by_default=False
|
182 |
))
|
@@ -184,37 +184,37 @@ class GuardBenchColumn:
|
|
184 |
# Jailbreaked prompts metrics
|
185 |
jailbreaked_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
186 |
name="jailbreaked_prompts_f1_binary",
|
187 |
-
display_name="
|
188 |
type="number",
|
189 |
displayed_by_default=False
|
190 |
))
|
191 |
jailbreaked_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
192 |
name="jailbreaked_prompts_f1",
|
193 |
-
display_name="
|
194 |
type="number",
|
195 |
displayed_by_default=False
|
196 |
))
|
197 |
jailbreaked_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
198 |
name="jailbreaked_prompts_recall_binary",
|
199 |
-
display_name="
|
200 |
type="number",
|
201 |
displayed_by_default=False
|
202 |
))
|
203 |
jailbreaked_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
204 |
name="jailbreaked_prompts_precision_binary",
|
205 |
-
display_name="
|
206 |
type="number",
|
207 |
displayed_by_default=False
|
208 |
))
|
209 |
jailbreaked_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
210 |
name="jailbreaked_prompts_error_ratio",
|
211 |
-
display_name="
|
212 |
type="number",
|
213 |
displayed_by_default=False
|
214 |
))
|
215 |
jailbreaked_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
216 |
name="jailbreaked_prompts_avg_runtime_ms",
|
217 |
-
display_name="
|
218 |
type="number",
|
219 |
displayed_by_default=False
|
220 |
))
|
@@ -222,37 +222,37 @@ class GuardBenchColumn:
|
|
222 |
# Default answers metrics
|
223 |
default_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
224 |
name="default_answers_f1_binary",
|
225 |
-
display_name="
|
226 |
type="number",
|
227 |
displayed_by_default=False
|
228 |
))
|
229 |
default_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
230 |
name="default_answers_f1",
|
231 |
-
display_name="
|
232 |
type="number",
|
233 |
displayed_by_default=False
|
234 |
))
|
235 |
default_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
236 |
name="default_answers_recall_binary",
|
237 |
-
display_name="
|
238 |
type="number",
|
239 |
displayed_by_default=False
|
240 |
))
|
241 |
default_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
242 |
name="default_answers_precision_binary",
|
243 |
-
display_name="
|
244 |
type="number",
|
245 |
displayed_by_default=False
|
246 |
))
|
247 |
default_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
248 |
name="default_answers_error_ratio",
|
249 |
-
display_name="
|
250 |
type="number",
|
251 |
displayed_by_default=False
|
252 |
))
|
253 |
default_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
254 |
name="default_answers_avg_runtime_ms",
|
255 |
-
display_name="
|
256 |
type="number",
|
257 |
displayed_by_default=False
|
258 |
))
|
@@ -260,37 +260,37 @@ class GuardBenchColumn:
|
|
260 |
# Jailbreaked answers metrics
|
261 |
jailbreaked_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
262 |
name="jailbreaked_answers_f1_binary",
|
263 |
-
display_name="
|
264 |
type="number",
|
265 |
displayed_by_default=False
|
266 |
))
|
267 |
jailbreaked_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
268 |
name="jailbreaked_answers_f1",
|
269 |
-
display_name="
|
270 |
type="number",
|
271 |
displayed_by_default=False
|
272 |
))
|
273 |
jailbreaked_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
274 |
name="jailbreaked_answers_recall_binary",
|
275 |
-
display_name="
|
276 |
type="number",
|
277 |
displayed_by_default=False
|
278 |
))
|
279 |
jailbreaked_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
280 |
name="jailbreaked_answers_precision_binary",
|
281 |
-
display_name="
|
282 |
type="number",
|
283 |
displayed_by_default=False
|
284 |
))
|
285 |
jailbreaked_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
286 |
name="jailbreaked_answers_error_ratio",
|
287 |
-
display_name="
|
288 |
type="number",
|
289 |
displayed_by_default=False
|
290 |
))
|
291 |
jailbreaked_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
292 |
name="jailbreaked_answers_avg_runtime_ms",
|
293 |
-
display_name="
|
294 |
type="number",
|
295 |
displayed_by_default=False
|
296 |
))
|
|
|
109 |
))
|
110 |
submission_date: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
111 |
name="submission_date",
|
112 |
+
display_name="Submission_Date",
|
113 |
displayed_by_default=False
|
114 |
))
|
115 |
version: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
|
|
146 |
# Default prompts metrics
|
147 |
default_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
148 |
name="default_prompts_f1_binary",
|
149 |
+
display_name="Default_Prompts_F1_Binary",
|
150 |
type="number",
|
151 |
displayed_by_default=False
|
152 |
))
|
153 |
default_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
154 |
name="default_prompts_f1",
|
155 |
+
display_name="Default_Prompts_F1",
|
156 |
type="number",
|
157 |
displayed_by_default=False
|
158 |
))
|
159 |
default_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
160 |
name="default_prompts_recall_binary",
|
161 |
+
display_name="Default_Prompts_Recall",
|
162 |
type="number",
|
163 |
displayed_by_default=False
|
164 |
))
|
165 |
default_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
166 |
name="default_prompts_precision_binary",
|
167 |
+
display_name="Default_Prompts_Precision",
|
168 |
type="number",
|
169 |
displayed_by_default=False
|
170 |
))
|
171 |
default_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
172 |
name="default_prompts_error_ratio",
|
173 |
+
display_name="Default_Prompts_Error_Ratio",
|
174 |
type="number",
|
175 |
displayed_by_default=False
|
176 |
))
|
177 |
default_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
178 |
name="default_prompts_avg_runtime_ms",
|
179 |
+
display_name="Default_Prompts_Avg_Runtime_ms",
|
180 |
type="number",
|
181 |
displayed_by_default=False
|
182 |
))
|
|
|
184 |
# Jailbreaked prompts metrics
|
185 |
jailbreaked_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
186 |
name="jailbreaked_prompts_f1_binary",
|
187 |
+
display_name="Jailbreaked_Prompts_F1_Binary",
|
188 |
type="number",
|
189 |
displayed_by_default=False
|
190 |
))
|
191 |
jailbreaked_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
192 |
name="jailbreaked_prompts_f1",
|
193 |
+
display_name="Jailbreaked_Prompts_F1",
|
194 |
type="number",
|
195 |
displayed_by_default=False
|
196 |
))
|
197 |
jailbreaked_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
198 |
name="jailbreaked_prompts_recall_binary",
|
199 |
+
display_name="Jailbreaked_Prompts_Recall",
|
200 |
type="number",
|
201 |
displayed_by_default=False
|
202 |
))
|
203 |
jailbreaked_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
204 |
name="jailbreaked_prompts_precision_binary",
|
205 |
+
display_name="Jailbreaked_Prompts_Precision",
|
206 |
type="number",
|
207 |
displayed_by_default=False
|
208 |
))
|
209 |
jailbreaked_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
210 |
name="jailbreaked_prompts_error_ratio",
|
211 |
+
display_name="Jailbreaked_Prompts_Error_Ratio",
|
212 |
type="number",
|
213 |
displayed_by_default=False
|
214 |
))
|
215 |
jailbreaked_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
216 |
name="jailbreaked_prompts_avg_runtime_ms",
|
217 |
+
display_name="Jailbreaked_Prompts_Avg_Runtime_ms",
|
218 |
type="number",
|
219 |
displayed_by_default=False
|
220 |
))
|
|
|
222 |
# Default answers metrics
|
223 |
default_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
224 |
name="default_answers_f1_binary",
|
225 |
+
display_name="Default_Answers_F1_Binary",
|
226 |
type="number",
|
227 |
displayed_by_default=False
|
228 |
))
|
229 |
default_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
230 |
name="default_answers_f1",
|
231 |
+
display_name="Default_Answers_F1",
|
232 |
type="number",
|
233 |
displayed_by_default=False
|
234 |
))
|
235 |
default_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
236 |
name="default_answers_recall_binary",
|
237 |
+
display_name="Default_Answers_Recall",
|
238 |
type="number",
|
239 |
displayed_by_default=False
|
240 |
))
|
241 |
default_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
242 |
name="default_answers_precision_binary",
|
243 |
+
display_name="Default_Answers_Precision",
|
244 |
type="number",
|
245 |
displayed_by_default=False
|
246 |
))
|
247 |
default_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
248 |
name="default_answers_error_ratio",
|
249 |
+
display_name="Default_Answers_Error_Ratio",
|
250 |
type="number",
|
251 |
displayed_by_default=False
|
252 |
))
|
253 |
default_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
254 |
name="default_answers_avg_runtime_ms",
|
255 |
+
display_name="Default_Answers_Avg_Runtime_ms",
|
256 |
type="number",
|
257 |
displayed_by_default=False
|
258 |
))
|
|
|
260 |
# Jailbreaked answers metrics
|
261 |
jailbreaked_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
262 |
name="jailbreaked_answers_f1_binary",
|
263 |
+
display_name="Jailbreaked_Answers_F1_Binary",
|
264 |
type="number",
|
265 |
displayed_by_default=False
|
266 |
))
|
267 |
jailbreaked_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
268 |
name="jailbreaked_answers_f1",
|
269 |
+
display_name="Jailbreaked_Answers_F1",
|
270 |
type="number",
|
271 |
displayed_by_default=False
|
272 |
))
|
273 |
jailbreaked_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
274 |
name="jailbreaked_answers_recall_binary",
|
275 |
+
display_name="Jailbreaked_Answers_Recall",
|
276 |
type="number",
|
277 |
displayed_by_default=False
|
278 |
))
|
279 |
jailbreaked_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
280 |
name="jailbreaked_answers_precision_binary",
|
281 |
+
display_name="Jailbreaked_Answers_Precision",
|
282 |
type="number",
|
283 |
displayed_by_default=False
|
284 |
))
|
285 |
jailbreaked_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
286 |
name="jailbreaked_answers_error_ratio",
|
287 |
+
display_name="Jailbreaked_Answers_Error_Ratio",
|
288 |
type="number",
|
289 |
displayed_by_default=False
|
290 |
))
|
291 |
jailbreaked_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
292 |
name="jailbreaked_answers_avg_runtime_ms",
|
293 |
+
display_name="Jailbreaked_Answers_Avg_Runtime_ms",
|
294 |
type="number",
|
295 |
displayed_by_default=False
|
296 |
))
|