apsys commited on
Commit
7012feb
·
1 Parent(s): 4bcb70a
Files changed (2) hide show
  1. app.py +11 -2
  2. src/display/utils.py +25 -25
app.py CHANGED
@@ -13,7 +13,7 @@ import plotly.graph_objects as go
13
  from apscheduler.schedulers.background import BackgroundScheduler
14
  import numpy as np
15
  from gradio.themes.utils import fonts, colors
16
- from dataclasses import fields, ColumnInfo
17
 
18
  from src.about import (
19
  CITATION_BUTTON_LABEL,
@@ -148,7 +148,16 @@ custom_theme = gr.themes.Default(
148
  block_border_color_dark="#333333", # Cooler Grey
149
  )
150
 
151
- # Define the update_column_choices function before initializing the leaderboard components
 
 
 
 
 
 
 
 
 
152
  def update_column_choices(df):
153
  """Update column choices based on what's actually in the dataframe"""
154
  if df is None or df.empty:
 
13
  from apscheduler.schedulers.background import BackgroundScheduler
14
  import numpy as np
15
  from gradio.themes.utils import fonts, colors
16
+ from dataclasses import fields, dataclass
17
 
18
  from src.about import (
19
  CITATION_BUTTON_LABEL,
 
148
  block_border_color_dark="#333333", # Cooler Grey
149
  )
150
 
151
+ @dataclass
152
+ class ColumnInfo:
153
+ """Information about a column in the leaderboard."""
154
+ name: str
155
+ display_name: str
156
+ type: str = "text"
157
+ hidden: bool = False
158
+ never_hidden: bool = False
159
+ displayed_by_default: bool = True
160
+
161
  def update_column_choices(df):
162
  """Update column choices based on what's actually in the dataframe"""
163
  if df is None or df.empty:
src/display/utils.py CHANGED
@@ -109,7 +109,7 @@ class GuardBenchColumn:
109
  ))
110
  submission_date: ColumnInfo = field(default_factory=lambda: ColumnInfo(
111
  name="submission_date",
112
- display_name="Submission Date",
113
  displayed_by_default=False
114
  ))
115
  version: ColumnInfo = field(default_factory=lambda: ColumnInfo(
@@ -146,37 +146,37 @@ class GuardBenchColumn:
146
  # Default prompts metrics
147
  default_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
148
  name="default_prompts_f1_binary",
149
- display_name="Default Prompts F1 Binary",
150
  type="number",
151
  displayed_by_default=False
152
  ))
153
  default_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
154
  name="default_prompts_f1",
155
- display_name="Default Prompts F1",
156
  type="number",
157
  displayed_by_default=False
158
  ))
159
  default_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
160
  name="default_prompts_recall_binary",
161
- display_name="Default Prompts Recall",
162
  type="number",
163
  displayed_by_default=False
164
  ))
165
  default_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
166
  name="default_prompts_precision_binary",
167
- display_name="Default Prompts Precision",
168
  type="number",
169
  displayed_by_default=False
170
  ))
171
  default_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
172
  name="default_prompts_error_ratio",
173
- display_name="Default Prompts Error Ratio",
174
  type="number",
175
  displayed_by_default=False
176
  ))
177
  default_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
178
  name="default_prompts_avg_runtime_ms",
179
- display_name="Default Prompts Avg Runtime (ms)",
180
  type="number",
181
  displayed_by_default=False
182
  ))
@@ -184,37 +184,37 @@ class GuardBenchColumn:
184
  # Jailbreaked prompts metrics
185
  jailbreaked_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
186
  name="jailbreaked_prompts_f1_binary",
187
- display_name="Jailbreaked Prompts F1 Binary",
188
  type="number",
189
  displayed_by_default=False
190
  ))
191
  jailbreaked_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
192
  name="jailbreaked_prompts_f1",
193
- display_name="Jailbreaked Prompts F1",
194
  type="number",
195
  displayed_by_default=False
196
  ))
197
  jailbreaked_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
198
  name="jailbreaked_prompts_recall_binary",
199
- display_name="Jailbreaked Prompts Recall",
200
  type="number",
201
  displayed_by_default=False
202
  ))
203
  jailbreaked_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
204
  name="jailbreaked_prompts_precision_binary",
205
- display_name="Jailbreaked Prompts Precision",
206
  type="number",
207
  displayed_by_default=False
208
  ))
209
  jailbreaked_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
210
  name="jailbreaked_prompts_error_ratio",
211
- display_name="Jailbreaked Prompts Error Ratio",
212
  type="number",
213
  displayed_by_default=False
214
  ))
215
  jailbreaked_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
216
  name="jailbreaked_prompts_avg_runtime_ms",
217
- display_name="Jailbreaked Prompts Avg Runtime (ms)",
218
  type="number",
219
  displayed_by_default=False
220
  ))
@@ -222,37 +222,37 @@ class GuardBenchColumn:
222
  # Default answers metrics
223
  default_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
224
  name="default_answers_f1_binary",
225
- display_name="Default Answers F1 Binary",
226
  type="number",
227
  displayed_by_default=False
228
  ))
229
  default_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
230
  name="default_answers_f1",
231
- display_name="Default Answers F1",
232
  type="number",
233
  displayed_by_default=False
234
  ))
235
  default_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
236
  name="default_answers_recall_binary",
237
- display_name="Default Answers Recall",
238
  type="number",
239
  displayed_by_default=False
240
  ))
241
  default_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
242
  name="default_answers_precision_binary",
243
- display_name="Default Answers Precision",
244
  type="number",
245
  displayed_by_default=False
246
  ))
247
  default_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
248
  name="default_answers_error_ratio",
249
- display_name="Default Answers Error Ratio",
250
  type="number",
251
  displayed_by_default=False
252
  ))
253
  default_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
254
  name="default_answers_avg_runtime_ms",
255
- display_name="Default Answers Avg Runtime (ms)",
256
  type="number",
257
  displayed_by_default=False
258
  ))
@@ -260,37 +260,37 @@ class GuardBenchColumn:
260
  # Jailbreaked answers metrics
261
  jailbreaked_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
262
  name="jailbreaked_answers_f1_binary",
263
- display_name="Jailbreaked Answers F1 Binary",
264
  type="number",
265
  displayed_by_default=False
266
  ))
267
  jailbreaked_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
268
  name="jailbreaked_answers_f1",
269
- display_name="Jailbreaked Answers F1",
270
  type="number",
271
  displayed_by_default=False
272
  ))
273
  jailbreaked_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
274
  name="jailbreaked_answers_recall_binary",
275
- display_name="Jailbreaked Answers Recall",
276
  type="number",
277
  displayed_by_default=False
278
  ))
279
  jailbreaked_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
280
  name="jailbreaked_answers_precision_binary",
281
- display_name="Jailbreaked Answers Precision",
282
  type="number",
283
  displayed_by_default=False
284
  ))
285
  jailbreaked_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
286
  name="jailbreaked_answers_error_ratio",
287
- display_name="Jailbreaked Answers Error Ratio",
288
  type="number",
289
  displayed_by_default=False
290
  ))
291
  jailbreaked_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
292
  name="jailbreaked_answers_avg_runtime_ms",
293
- display_name="Jailbreaked Answers Avg Runtime (ms)",
294
  type="number",
295
  displayed_by_default=False
296
  ))
 
109
  ))
110
  submission_date: ColumnInfo = field(default_factory=lambda: ColumnInfo(
111
  name="submission_date",
112
+ display_name="Submission_Date",
113
  displayed_by_default=False
114
  ))
115
  version: ColumnInfo = field(default_factory=lambda: ColumnInfo(
 
146
  # Default prompts metrics
147
  default_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
148
  name="default_prompts_f1_binary",
149
+ display_name="Default_Prompts_F1_Binary",
150
  type="number",
151
  displayed_by_default=False
152
  ))
153
  default_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
154
  name="default_prompts_f1",
155
+ display_name="Default_Prompts_F1",
156
  type="number",
157
  displayed_by_default=False
158
  ))
159
  default_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
160
  name="default_prompts_recall_binary",
161
+ display_name="Default_Prompts_Recall",
162
  type="number",
163
  displayed_by_default=False
164
  ))
165
  default_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
166
  name="default_prompts_precision_binary",
167
+ display_name="Default_Prompts_Precision",
168
  type="number",
169
  displayed_by_default=False
170
  ))
171
  default_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
172
  name="default_prompts_error_ratio",
173
+ display_name="Default_Prompts_Error_Ratio",
174
  type="number",
175
  displayed_by_default=False
176
  ))
177
  default_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
178
  name="default_prompts_avg_runtime_ms",
179
+ display_name="Default_Prompts_Avg_Runtime_ms",
180
  type="number",
181
  displayed_by_default=False
182
  ))
 
184
  # Jailbreaked prompts metrics
185
  jailbreaked_prompts_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
186
  name="jailbreaked_prompts_f1_binary",
187
+ display_name="Jailbreaked_Prompts_F1_Binary",
188
  type="number",
189
  displayed_by_default=False
190
  ))
191
  jailbreaked_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
192
  name="jailbreaked_prompts_f1",
193
+ display_name="Jailbreaked_Prompts_F1",
194
  type="number",
195
  displayed_by_default=False
196
  ))
197
  jailbreaked_prompts_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
198
  name="jailbreaked_prompts_recall_binary",
199
+ display_name="Jailbreaked_Prompts_Recall",
200
  type="number",
201
  displayed_by_default=False
202
  ))
203
  jailbreaked_prompts_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
204
  name="jailbreaked_prompts_precision_binary",
205
+ display_name="Jailbreaked_Prompts_Precision",
206
  type="number",
207
  displayed_by_default=False
208
  ))
209
  jailbreaked_prompts_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
210
  name="jailbreaked_prompts_error_ratio",
211
+ display_name="Jailbreaked_Prompts_Error_Ratio",
212
  type="number",
213
  displayed_by_default=False
214
  ))
215
  jailbreaked_prompts_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
216
  name="jailbreaked_prompts_avg_runtime_ms",
217
+ display_name="Jailbreaked_Prompts_Avg_Runtime_ms",
218
  type="number",
219
  displayed_by_default=False
220
  ))
 
222
  # Default answers metrics
223
  default_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
224
  name="default_answers_f1_binary",
225
+ display_name="Default_Answers_F1_Binary",
226
  type="number",
227
  displayed_by_default=False
228
  ))
229
  default_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
230
  name="default_answers_f1",
231
+ display_name="Default_Answers_F1",
232
  type="number",
233
  displayed_by_default=False
234
  ))
235
  default_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
236
  name="default_answers_recall_binary",
237
+ display_name="Default_Answers_Recall",
238
  type="number",
239
  displayed_by_default=False
240
  ))
241
  default_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
242
  name="default_answers_precision_binary",
243
+ display_name="Default_Answers_Precision",
244
  type="number",
245
  displayed_by_default=False
246
  ))
247
  default_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
248
  name="default_answers_error_ratio",
249
+ display_name="Default_Answers_Error_Ratio",
250
  type="number",
251
  displayed_by_default=False
252
  ))
253
  default_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
254
  name="default_answers_avg_runtime_ms",
255
+ display_name="Default_Answers_Avg_Runtime_ms",
256
  type="number",
257
  displayed_by_default=False
258
  ))
 
260
  # Jailbreaked answers metrics
261
  jailbreaked_answers_f1_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
262
  name="jailbreaked_answers_f1_binary",
263
+ display_name="Jailbreaked_Answers_F1_Binary",
264
  type="number",
265
  displayed_by_default=False
266
  ))
267
  jailbreaked_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
268
  name="jailbreaked_answers_f1",
269
+ display_name="Jailbreaked_Answers_F1",
270
  type="number",
271
  displayed_by_default=False
272
  ))
273
  jailbreaked_answers_recall_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
274
  name="jailbreaked_answers_recall_binary",
275
+ display_name="Jailbreaked_Answers_Recall",
276
  type="number",
277
  displayed_by_default=False
278
  ))
279
  jailbreaked_answers_precision_binary: ColumnInfo = field(default_factory=lambda: ColumnInfo(
280
  name="jailbreaked_answers_precision_binary",
281
+ display_name="Jailbreaked_Answers_Precision",
282
  type="number",
283
  displayed_by_default=False
284
  ))
285
  jailbreaked_answers_error_ratio: ColumnInfo = field(default_factory=lambda: ColumnInfo(
286
  name="jailbreaked_answers_error_ratio",
287
+ display_name="Jailbreaked_Answers_Error_Ratio",
288
  type="number",
289
  displayed_by_default=False
290
  ))
291
  jailbreaked_answers_avg_runtime_ms: ColumnInfo = field(default_factory=lambda: ColumnInfo(
292
  name="jailbreaked_answers_avg_runtime_ms",
293
+ display_name="Jailbreaked_Answers_Avg_Runtime_ms",
294
  type="number",
295
  displayed_by_default=False
296
  ))