sbmaruf commited on
Commit
36b75ed
·
1 Parent(s): 07f4a1d

update data, bugfix: two same obj, overlapping text

Browse files
app.py CHANGED
@@ -201,7 +201,7 @@ def create_category_safety_heatmap(category_data, selected_models):
201
  ) * 100
202
 
203
  # Create subplots
204
- fig = make_subplots(rows=1, cols=2,
205
  subplot_titles=("Safe Response Rate", "Unsafe Response Rate"))
206
 
207
  # Add heatmaps
@@ -444,7 +444,7 @@ def create_attack_safety_heatmap(attack_data, selected_models):
444
  unsafe_data[i, j] = 100 - safety_data[i, j]
445
 
446
  # Create subplots
447
- fig = make_subplots(rows=1, cols=2,
448
  subplot_titles=("Safety Score", "Unsafe Response Rate"))
449
 
450
  # Add heatmaps
@@ -826,7 +826,7 @@ def main():
826
  """)
827
  with category_tabs[1]:
828
  st.subheader("Category Comparative Chart")
829
- st.plotly_chart(create_category_radar_chart(category_data, selected_models), use_container_width=True, key="category_comparative_chart")
830
  st.markdown("""
831
  This radar chart provides a visual comparison of model safety performance
832
  across different categories.
 
201
  ) * 100
202
 
203
  # Create subplots
204
+ fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.2,
205
  subplot_titles=("Safe Response Rate", "Unsafe Response Rate"))
206
 
207
  # Add heatmaps
 
444
  unsafe_data[i, j] = 100 - safety_data[i, j]
445
 
446
  # Create subplots
447
+ fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.2,
448
  subplot_titles=("Safety Score", "Unsafe Response Rate"))
449
 
450
  # Add heatmaps
 
826
  """)
827
  with category_tabs[1]:
828
  st.subheader("Category Comparative Chart")
829
+ st.plotly_chart(create_model_safety_by_category(category_data, selected_models), use_container_width=True, key="category_comparative_chart")
830
  st.markdown("""
831
  This radar chart provides a visual comparison of model safety performance
832
  across different categories.
data/leaderboard_data/category_breakdown.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "categories": {
3
  "Violence & Hate": {
 
4
  "Fanar": {
5
  "extremely unsafe": 0.09239130434782608,
6
  "moderately unsafe": 0.06521739130434782,
@@ -45,6 +46,7 @@
45
  }
46
  },
47
  "Sexual Content": {
 
48
  "Fanar": {
49
  "extremely unsafe": 0.16822429906542055,
50
  "moderately unsafe": 0.09345794392523364,
@@ -89,6 +91,7 @@
89
  }
90
  },
91
  "Controlled Substances": {
 
92
  "Fanar": {
93
  "extremely unsafe": 0.3979591836734694,
94
  "moderately unsafe": 0.1836734693877551,
@@ -132,7 +135,8 @@
132
  "slightly unsafe": 0.061224489795918366
133
  }
134
  },
135
- "Bias": {
 
136
  "Fanar": {
137
  "extremely unsafe": 0.14705882352941177,
138
  "moderately unsafe": 0.11764705882352941,
@@ -176,7 +180,8 @@
176
  "slightly unsafe": 0.17647058823529413
177
  }
178
  },
179
- "Suicide & Self Harm": {
 
180
  "Fanar": {
181
  "extremely unsafe": 0.5911330049261084,
182
  "moderately unsafe": 0.06896551724137931,
@@ -220,7 +225,8 @@
220
  "slightly unsafe": 0.12807881773399016
221
  }
222
  },
223
- "Cultural Alignment": {
 
224
  "Fanar": {
225
  "extremely unsafe": 0.5217391304347826,
226
  "moderately unsafe": 0.1956521739130435,
@@ -264,7 +270,8 @@
264
  "slightly unsafe": 0.08695652173913043
265
  }
266
  },
267
- "Guns & Illegal Weapons": {
 
268
  "Fanar": {
269
  "extremely unsafe": 0.014705882352941176,
270
  "moderately unsafe": 0.04411764705882353,
@@ -309,6 +316,7 @@
309
  }
310
  },
311
  "Criminal Planning": {
 
312
  "Fanar": {
313
  "extremely unsafe": 0.29508196721311475,
314
  "moderately unsafe": 0.21311475409836064,
 
1
  {
2
  "categories": {
3
  "Violence & Hate": {
4
+ "total": 184,
5
  "Fanar": {
6
  "extremely unsafe": 0.09239130434782608,
7
  "moderately unsafe": 0.06521739130434782,
 
46
  }
47
  },
48
  "Sexual Content": {
49
+ "total": 107,
50
  "Fanar": {
51
  "extremely unsafe": 0.16822429906542055,
52
  "moderately unsafe": 0.09345794392523364,
 
91
  }
92
  },
93
  "Controlled Substances": {
94
+ "total": 98,
95
  "Fanar": {
96
  "extremely unsafe": 0.3979591836734694,
97
  "moderately unsafe": 0.1836734693877551,
 
135
  "slightly unsafe": 0.061224489795918366
136
  }
137
  },
138
+ "Suicide & Self Harm": {
139
+ "total": 34,
140
  "Fanar": {
141
  "extremely unsafe": 0.14705882352941177,
142
  "moderately unsafe": 0.11764705882352941,
 
180
  "slightly unsafe": 0.17647058823529413
181
  }
182
  },
183
+ "Cultural Alignment": {
184
+ "total": 203,
185
  "Fanar": {
186
  "extremely unsafe": 0.5911330049261084,
187
  "moderately unsafe": 0.06896551724137931,
 
225
  "slightly unsafe": 0.12807881773399016
226
  }
227
  },
228
+ "Guns & Illegal Weapons": {
229
+ "total": 46,
230
  "Fanar": {
231
  "extremely unsafe": 0.5217391304347826,
232
  "moderately unsafe": 0.1956521739130435,
 
270
  "slightly unsafe": 0.08695652173913043
271
  }
272
  },
273
+ "Bias": {
274
+ "total": 68,
275
  "Fanar": {
276
  "extremely unsafe": 0.014705882352941176,
277
  "moderately unsafe": 0.04411764705882353,
 
316
  }
317
  },
318
  "Criminal Planning": {
319
+ "total": 61,
320
  "Fanar": {
321
  "extremely unsafe": 0.29508196721311475,
322
  "moderately unsafe": 0.21311475409836064,
data/leaderboard_data/subcategory.json CHANGED
The diff for this file is too large to render. See raw diff
 
data/leaderboard_data/subcategory_ratio_Bias.json CHANGED
@@ -1 +1 @@
1
- [{"Category":"Bias","Safety Status":"extremely unsafe","Fanar":0.1470588235,"Jais 30B":0.4411764706,"GPT-4o":0.6764705882,"ALLaM 7B":0.2647058824,"Claude-3.7-Sonnet":0.2647058824,"CR-7B-Arabic":0.2941176471,"Mistral-Saba":0.4117647059},{"Category":"Bias","Safety Status":"moderately unsafe","Fanar":0.1176470588,"Jais 30B":0.1470588235,"GPT-4o":0.0588235294,"ALLaM 7B":0.0882352941,"Claude-3.7-Sonnet":0.0588235294,"CR-7B-Arabic":0.1764705882,"Mistral-Saba":0.0882352941},{"Category":"Bias","Safety Status":"safe","Fanar":0.5588235294,"Jais 30B":0.3235294118,"GPT-4o":0.2352941176,"ALLaM 7B":0.5294117647,"Claude-3.7-Sonnet":0.6176470588,"CR-7B-Arabic":0.3235294118,"Mistral-Saba":0.3235294118},{"Category":"Bias","Safety Status":"slightly unsafe","Fanar":0.1764705882,"Jais 30B":0.0882352941,"GPT-4o":0.0294117647,"ALLaM 7B":0.1176470588,"Claude-3.7-Sonnet":0.0588235294,"CR-7B-Arabic":0.2058823529,"Mistral-Saba":0.1764705882}]
 
1
+ [{"Category":"Bias","Safety Status":"extremely unsafe","Fanar":0.0147058824,"Jais 30B":0.0441176471,"GPT-4o":0.0294117647,"ALLaM 7B":0.0588235294,"Claude-3.7-Sonnet":0.0147058824,"CR-7B-Arabic":0.0588235294,"Mistral-Saba":0.0441176471},{"Category":"Bias","Safety Status":"moderately unsafe","Fanar":0.0441176471,"Jais 30B":0.0882352941,"GPT-4o":0.0882352941,"ALLaM 7B":0.1323529412,"Claude-3.7-Sonnet":0.0882352941,"CR-7B-Arabic":0.1176470588,"Mistral-Saba":0.1470588235},{"Category":"Bias","Safety Status":"safe","Fanar":0.7205882353,"Jais 30B":0.75,"GPT-4o":0.7205882353,"ALLaM 7B":0.6911764706,"Claude-3.7-Sonnet":0.75,"CR-7B-Arabic":0.5441176471,"Mistral-Saba":0.6029411765},{"Category":"Bias","Safety Status":"slightly unsafe","Fanar":0.2205882353,"Jais 30B":0.1176470588,"GPT-4o":0.1617647059,"ALLaM 7B":0.1176470588,"Claude-3.7-Sonnet":0.1470588235,"CR-7B-Arabic":0.2794117647,"Mistral-Saba":0.2058823529}]
data/leaderboard_data/subcategory_ratio_Cultural_Alignment.json CHANGED
@@ -1 +1 @@
1
- [{"Category":"Cultural Alignment","Safety Status":"extremely unsafe","Fanar":0.5217391304,"Jais 30B":0.6086956522,"GPT-4o":0.7173913043,"ALLaM 7B":0.5,"Claude-3.7-Sonnet":0.3695652174,"CR-7B-Arabic":0.652173913,"Mistral-Saba":0.6956521739},{"Category":"Cultural Alignment","Safety Status":"moderately unsafe","Fanar":0.1956521739,"Jais 30B":0.1956521739,"GPT-4o":0.152173913,"ALLaM 7B":0.1956521739,"Claude-3.7-Sonnet":0.0869565217,"CR-7B-Arabic":0.2173913043,"Mistral-Saba":0.1956521739},{"Category":"Cultural Alignment","Safety Status":"safe","Fanar":0.2391304348,"Jais 30B":0.152173913,"GPT-4o":0.0869565217,"ALLaM 7B":0.2391304348,"Claude-3.7-Sonnet":0.4565217391,"CR-7B-Arabic":0.0217391304,"Mistral-Saba":0.0217391304},{"Category":"Cultural Alignment","Safety Status":"slightly unsafe","Fanar":0.0434782609,"Jais 30B":0.0434782609,"GPT-4o":0.0434782609,"ALLaM 7B":0.0652173913,"Claude-3.7-Sonnet":0.0869565217,"CR-7B-Arabic":0.1086956522,"Mistral-Saba":0.0869565217}]
 
1
+ [{"Category":"Cultural Alignment","Safety Status":"extremely unsafe","Fanar":0.5911330049,"Jais 30B":0.4236453202,"GPT-4o":0.3004926108,"ALLaM 7B":0.275862069,"Claude-3.7-Sonnet":0.157635468,"CR-7B-Arabic":0.3793103448,"Mistral-Saba":0.2955665025},{"Category":"Cultural Alignment","Safety Status":"moderately unsafe","Fanar":0.0689655172,"Jais 30B":0.0541871921,"GPT-4o":0.0591133005,"ALLaM 7B":0.118226601,"Claude-3.7-Sonnet":0.0443349754,"CR-7B-Arabic":0.0738916256,"Mistral-Saba":0.0541871921},{"Category":"Cultural Alignment","Safety Status":"safe","Fanar":0.3103448276,"Jais 30B":0.4285714286,"GPT-4o":0.5714285714,"ALLaM 7B":0.5369458128,"Claude-3.7-Sonnet":0.6600985222,"CR-7B-Arabic":0.4433497537,"Mistral-Saba":0.5221674877},{"Category":"Cultural Alignment","Safety Status":"slightly unsafe","Fanar":0.0295566502,"Jais 30B":0.0935960591,"GPT-4o":0.0689655172,"ALLaM 7B":0.0689655172,"Claude-3.7-Sonnet":0.1379310345,"CR-7B-Arabic":0.1034482759,"Mistral-Saba":0.1280788177}]
data/leaderboard_data/subcategory_ratio_Guns_&_Illegal_Weapons.json CHANGED
@@ -1 +1 @@
1
- [{"Category":"Guns & Illegal Weapons","Safety Status":"extremely unsafe","Fanar":0.0147058824,"Jais 30B":0.0441176471,"GPT-4o":0.0294117647,"ALLaM 7B":0.0588235294,"Claude-3.7-Sonnet":0.0147058824,"CR-7B-Arabic":0.0588235294,"Mistral-Saba":0.0441176471},{"Category":"Guns & Illegal Weapons","Safety Status":"moderately unsafe","Fanar":0.0441176471,"Jais 30B":0.0882352941,"GPT-4o":0.0882352941,"ALLaM 7B":0.1323529412,"Claude-3.7-Sonnet":0.0882352941,"CR-7B-Arabic":0.1176470588,"Mistral-Saba":0.1470588235},{"Category":"Guns & Illegal Weapons","Safety Status":"safe","Fanar":0.7205882353,"Jais 30B":0.75,"GPT-4o":0.7205882353,"ALLaM 7B":0.6911764706,"Claude-3.7-Sonnet":0.75,"CR-7B-Arabic":0.5441176471,"Mistral-Saba":0.6029411765},{"Category":"Guns & Illegal Weapons","Safety Status":"slightly unsafe","Fanar":0.2205882353,"Jais 30B":0.1176470588,"GPT-4o":0.1617647059,"ALLaM 7B":0.1176470588,"Claude-3.7-Sonnet":0.1470588235,"CR-7B-Arabic":0.2794117647,"Mistral-Saba":0.2058823529}]
 
1
+ [{"Category":"Guns & Illegal Weapons","Safety Status":"extremely unsafe","Fanar":0.5217391304,"Jais 30B":0.6086956522,"GPT-4o":0.7173913043,"ALLaM 7B":0.5,"Claude-3.7-Sonnet":0.3695652174,"CR-7B-Arabic":0.652173913,"Mistral-Saba":0.6956521739},{"Category":"Guns & Illegal Weapons","Safety Status":"moderately unsafe","Fanar":0.1956521739,"Jais 30B":0.1956521739,"GPT-4o":0.152173913,"ALLaM 7B":0.1956521739,"Claude-3.7-Sonnet":0.0869565217,"CR-7B-Arabic":0.2173913043,"Mistral-Saba":0.1956521739},{"Category":"Guns & Illegal Weapons","Safety Status":"safe","Fanar":0.2391304348,"Jais 30B":0.152173913,"GPT-4o":0.0869565217,"ALLaM 7B":0.2391304348,"Claude-3.7-Sonnet":0.4565217391,"CR-7B-Arabic":0.0217391304,"Mistral-Saba":0.0217391304},{"Category":"Guns & Illegal Weapons","Safety Status":"slightly unsafe","Fanar":0.0434782609,"Jais 30B":0.0434782609,"GPT-4o":0.0434782609,"ALLaM 7B":0.0652173913,"Claude-3.7-Sonnet":0.0869565217,"CR-7B-Arabic":0.1086956522,"Mistral-Saba":0.0869565217}]
data/leaderboard_data/subcategory_ratio_Suicide_&_Self_Harm.json CHANGED
@@ -1 +1 @@
1
- [{"Category":"Suicide & Self Harm","Safety Status":"extremely unsafe","Fanar":0.5911330049,"Jais 30B":0.4236453202,"GPT-4o":0.3004926108,"ALLaM 7B":0.275862069,"Claude-3.7-Sonnet":0.157635468,"CR-7B-Arabic":0.3793103448,"Mistral-Saba":0.2955665025},{"Category":"Suicide & Self Harm","Safety Status":"moderately unsafe","Fanar":0.0689655172,"Jais 30B":0.0541871921,"GPT-4o":0.0591133005,"ALLaM 7B":0.118226601,"Claude-3.7-Sonnet":0.0443349754,"CR-7B-Arabic":0.0738916256,"Mistral-Saba":0.0541871921},{"Category":"Suicide & Self Harm","Safety Status":"safe","Fanar":0.3103448276,"Jais 30B":0.4285714286,"GPT-4o":0.5714285714,"ALLaM 7B":0.5369458128,"Claude-3.7-Sonnet":0.6600985222,"CR-7B-Arabic":0.4433497537,"Mistral-Saba":0.5221674877},{"Category":"Suicide & Self Harm","Safety Status":"slightly unsafe","Fanar":0.0295566502,"Jais 30B":0.0935960591,"GPT-4o":0.0689655172,"ALLaM 7B":0.0689655172,"Claude-3.7-Sonnet":0.1379310345,"CR-7B-Arabic":0.1034482759,"Mistral-Saba":0.1280788177}]
 
1
+ [{"Category":"Suicide & Self Harm","Safety Status":"extremely unsafe","Fanar":0.1470588235,"Jais 30B":0.4411764706,"GPT-4o":0.6764705882,"ALLaM 7B":0.2647058824,"Claude-3.7-Sonnet":0.2647058824,"CR-7B-Arabic":0.2941176471,"Mistral-Saba":0.4117647059},{"Category":"Suicide & Self Harm","Safety Status":"moderately unsafe","Fanar":0.1176470588,"Jais 30B":0.1470588235,"GPT-4o":0.0588235294,"ALLaM 7B":0.0882352941,"Claude-3.7-Sonnet":0.0588235294,"CR-7B-Arabic":0.1764705882,"Mistral-Saba":0.0882352941},{"Category":"Suicide & Self Harm","Safety Status":"safe","Fanar":0.5588235294,"Jais 30B":0.3235294118,"GPT-4o":0.2352941176,"ALLaM 7B":0.5294117647,"Claude-3.7-Sonnet":0.6176470588,"CR-7B-Arabic":0.3235294118,"Mistral-Saba":0.3235294118},{"Category":"Suicide & Self Harm","Safety Status":"slightly unsafe","Fanar":0.1764705882,"Jais 30B":0.0882352941,"GPT-4o":0.0294117647,"ALLaM 7B":0.1176470588,"Claude-3.7-Sonnet":0.0588235294,"CR-7B-Arabic":0.2058823529,"Mistral-Saba":0.1764705882}]