Spaces:
Running
Running
update data, bugfix: two same obj, overlapping text
Browse files- app.py +3 -3
- data/leaderboard_data/category_breakdown.json +12 -4
- data/leaderboard_data/subcategory.json +0 -0
- data/leaderboard_data/subcategory_ratio_Bias.json +1 -1
- data/leaderboard_data/subcategory_ratio_Cultural_Alignment.json +1 -1
- data/leaderboard_data/subcategory_ratio_Guns_&_Illegal_Weapons.json +1 -1
- data/leaderboard_data/subcategory_ratio_Suicide_&_Self_Harm.json +1 -1
app.py
CHANGED
@@ -201,7 +201,7 @@ def create_category_safety_heatmap(category_data, selected_models):
|
|
201 |
) * 100
|
202 |
|
203 |
# Create subplots
|
204 |
-
fig = make_subplots(rows=1, cols=2,
|
205 |
subplot_titles=("Safe Response Rate", "Unsafe Response Rate"))
|
206 |
|
207 |
# Add heatmaps
|
@@ -444,7 +444,7 @@ def create_attack_safety_heatmap(attack_data, selected_models):
|
|
444 |
unsafe_data[i, j] = 100 - safety_data[i, j]
|
445 |
|
446 |
# Create subplots
|
447 |
-
fig = make_subplots(rows=1, cols=2,
|
448 |
subplot_titles=("Safety Score", "Unsafe Response Rate"))
|
449 |
|
450 |
# Add heatmaps
|
@@ -826,7 +826,7 @@ def main():
|
|
826 |
""")
|
827 |
with category_tabs[1]:
|
828 |
st.subheader("Category Comparative Chart")
|
829 |
-
st.plotly_chart(
|
830 |
st.markdown("""
|
831 |
This radar chart provides a visual comparison of model safety performance
|
832 |
across different categories.
|
|
|
201 |
) * 100
|
202 |
|
203 |
# Create subplots
|
204 |
+
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.2,
|
205 |
subplot_titles=("Safe Response Rate", "Unsafe Response Rate"))
|
206 |
|
207 |
# Add heatmaps
|
|
|
444 |
unsafe_data[i, j] = 100 - safety_data[i, j]
|
445 |
|
446 |
# Create subplots
|
447 |
+
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.2,
|
448 |
subplot_titles=("Safety Score", "Unsafe Response Rate"))
|
449 |
|
450 |
# Add heatmaps
|
|
|
826 |
""")
|
827 |
with category_tabs[1]:
|
828 |
st.subheader("Category Comparative Chart")
|
829 |
+
st.plotly_chart(create_model_safety_by_category(category_data, selected_models), use_container_width=True, key="category_comparative_chart")
|
830 |
st.markdown("""
|
831 |
This radar chart provides a visual comparison of model safety performance
|
832 |
across different categories.
|
data/leaderboard_data/category_breakdown.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"categories": {
|
3 |
"Violence & Hate": {
|
|
|
4 |
"Fanar": {
|
5 |
"extremely unsafe": 0.09239130434782608,
|
6 |
"moderately unsafe": 0.06521739130434782,
|
@@ -45,6 +46,7 @@
|
|
45 |
}
|
46 |
},
|
47 |
"Sexual Content": {
|
|
|
48 |
"Fanar": {
|
49 |
"extremely unsafe": 0.16822429906542055,
|
50 |
"moderately unsafe": 0.09345794392523364,
|
@@ -89,6 +91,7 @@
|
|
89 |
}
|
90 |
},
|
91 |
"Controlled Substances": {
|
|
|
92 |
"Fanar": {
|
93 |
"extremely unsafe": 0.3979591836734694,
|
94 |
"moderately unsafe": 0.1836734693877551,
|
@@ -132,7 +135,8 @@
|
|
132 |
"slightly unsafe": 0.061224489795918366
|
133 |
}
|
134 |
},
|
135 |
-
"
|
|
|
136 |
"Fanar": {
|
137 |
"extremely unsafe": 0.14705882352941177,
|
138 |
"moderately unsafe": 0.11764705882352941,
|
@@ -176,7 +180,8 @@
|
|
176 |
"slightly unsafe": 0.17647058823529413
|
177 |
}
|
178 |
},
|
179 |
-
"
|
|
|
180 |
"Fanar": {
|
181 |
"extremely unsafe": 0.5911330049261084,
|
182 |
"moderately unsafe": 0.06896551724137931,
|
@@ -220,7 +225,8 @@
|
|
220 |
"slightly unsafe": 0.12807881773399016
|
221 |
}
|
222 |
},
|
223 |
-
"
|
|
|
224 |
"Fanar": {
|
225 |
"extremely unsafe": 0.5217391304347826,
|
226 |
"moderately unsafe": 0.1956521739130435,
|
@@ -264,7 +270,8 @@
|
|
264 |
"slightly unsafe": 0.08695652173913043
|
265 |
}
|
266 |
},
|
267 |
-
"
|
|
|
268 |
"Fanar": {
|
269 |
"extremely unsafe": 0.014705882352941176,
|
270 |
"moderately unsafe": 0.04411764705882353,
|
@@ -309,6 +316,7 @@
|
|
309 |
}
|
310 |
},
|
311 |
"Criminal Planning": {
|
|
|
312 |
"Fanar": {
|
313 |
"extremely unsafe": 0.29508196721311475,
|
314 |
"moderately unsafe": 0.21311475409836064,
|
|
|
1 |
{
|
2 |
"categories": {
|
3 |
"Violence & Hate": {
|
4 |
+
"total": 184,
|
5 |
"Fanar": {
|
6 |
"extremely unsafe": 0.09239130434782608,
|
7 |
"moderately unsafe": 0.06521739130434782,
|
|
|
46 |
}
|
47 |
},
|
48 |
"Sexual Content": {
|
49 |
+
"total": 107,
|
50 |
"Fanar": {
|
51 |
"extremely unsafe": 0.16822429906542055,
|
52 |
"moderately unsafe": 0.09345794392523364,
|
|
|
91 |
}
|
92 |
},
|
93 |
"Controlled Substances": {
|
94 |
+
"total": 98,
|
95 |
"Fanar": {
|
96 |
"extremely unsafe": 0.3979591836734694,
|
97 |
"moderately unsafe": 0.1836734693877551,
|
|
|
135 |
"slightly unsafe": 0.061224489795918366
|
136 |
}
|
137 |
},
|
138 |
+
"Suicide & Self Harm": {
|
139 |
+
"total": 34,
|
140 |
"Fanar": {
|
141 |
"extremely unsafe": 0.14705882352941177,
|
142 |
"moderately unsafe": 0.11764705882352941,
|
|
|
180 |
"slightly unsafe": 0.17647058823529413
|
181 |
}
|
182 |
},
|
183 |
+
"Cultural Alignment": {
|
184 |
+
"total": 203,
|
185 |
"Fanar": {
|
186 |
"extremely unsafe": 0.5911330049261084,
|
187 |
"moderately unsafe": 0.06896551724137931,
|
|
|
225 |
"slightly unsafe": 0.12807881773399016
|
226 |
}
|
227 |
},
|
228 |
+
"Guns & Illegal Weapons": {
|
229 |
+
"total": 46,
|
230 |
"Fanar": {
|
231 |
"extremely unsafe": 0.5217391304347826,
|
232 |
"moderately unsafe": 0.1956521739130435,
|
|
|
270 |
"slightly unsafe": 0.08695652173913043
|
271 |
}
|
272 |
},
|
273 |
+
"Bias": {
|
274 |
+
"total": 68,
|
275 |
"Fanar": {
|
276 |
"extremely unsafe": 0.014705882352941176,
|
277 |
"moderately unsafe": 0.04411764705882353,
|
|
|
316 |
}
|
317 |
},
|
318 |
"Criminal Planning": {
|
319 |
+
"total": 61,
|
320 |
"Fanar": {
|
321 |
"extremely unsafe": 0.29508196721311475,
|
322 |
"moderately unsafe": 0.21311475409836064,
|
data/leaderboard_data/subcategory.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/leaderboard_data/subcategory_ratio_Bias.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
[{"Category":"Bias","Safety Status":"extremely unsafe","Fanar":0.
|
|
|
1 |
+
[{"Category":"Bias","Safety Status":"extremely unsafe","Fanar":0.0147058824,"Jais 30B":0.0441176471,"GPT-4o":0.0294117647,"ALLaM 7B":0.0588235294,"Claude-3.7-Sonnet":0.0147058824,"CR-7B-Arabic":0.0588235294,"Mistral-Saba":0.0441176471},{"Category":"Bias","Safety Status":"moderately unsafe","Fanar":0.0441176471,"Jais 30B":0.0882352941,"GPT-4o":0.0882352941,"ALLaM 7B":0.1323529412,"Claude-3.7-Sonnet":0.0882352941,"CR-7B-Arabic":0.1176470588,"Mistral-Saba":0.1470588235},{"Category":"Bias","Safety Status":"safe","Fanar":0.7205882353,"Jais 30B":0.75,"GPT-4o":0.7205882353,"ALLaM 7B":0.6911764706,"Claude-3.7-Sonnet":0.75,"CR-7B-Arabic":0.5441176471,"Mistral-Saba":0.6029411765},{"Category":"Bias","Safety Status":"slightly unsafe","Fanar":0.2205882353,"Jais 30B":0.1176470588,"GPT-4o":0.1617647059,"ALLaM 7B":0.1176470588,"Claude-3.7-Sonnet":0.1470588235,"CR-7B-Arabic":0.2794117647,"Mistral-Saba":0.2058823529}]
|
data/leaderboard_data/subcategory_ratio_Cultural_Alignment.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
[{"Category":"Cultural Alignment","Safety Status":"extremely unsafe","Fanar":0.
|
|
|
1 |
+
[{"Category":"Cultural Alignment","Safety Status":"extremely unsafe","Fanar":0.5911330049,"Jais 30B":0.4236453202,"GPT-4o":0.3004926108,"ALLaM 7B":0.275862069,"Claude-3.7-Sonnet":0.157635468,"CR-7B-Arabic":0.3793103448,"Mistral-Saba":0.2955665025},{"Category":"Cultural Alignment","Safety Status":"moderately unsafe","Fanar":0.0689655172,"Jais 30B":0.0541871921,"GPT-4o":0.0591133005,"ALLaM 7B":0.118226601,"Claude-3.7-Sonnet":0.0443349754,"CR-7B-Arabic":0.0738916256,"Mistral-Saba":0.0541871921},{"Category":"Cultural Alignment","Safety Status":"safe","Fanar":0.3103448276,"Jais 30B":0.4285714286,"GPT-4o":0.5714285714,"ALLaM 7B":0.5369458128,"Claude-3.7-Sonnet":0.6600985222,"CR-7B-Arabic":0.4433497537,"Mistral-Saba":0.5221674877},{"Category":"Cultural Alignment","Safety Status":"slightly unsafe","Fanar":0.0295566502,"Jais 30B":0.0935960591,"GPT-4o":0.0689655172,"ALLaM 7B":0.0689655172,"Claude-3.7-Sonnet":0.1379310345,"CR-7B-Arabic":0.1034482759,"Mistral-Saba":0.1280788177}]
|
data/leaderboard_data/subcategory_ratio_Guns_&_Illegal_Weapons.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
[{"Category":"Guns & Illegal Weapons","Safety Status":"extremely unsafe","Fanar":0.
|
|
|
1 |
+
[{"Category":"Guns & Illegal Weapons","Safety Status":"extremely unsafe","Fanar":0.5217391304,"Jais 30B":0.6086956522,"GPT-4o":0.7173913043,"ALLaM 7B":0.5,"Claude-3.7-Sonnet":0.3695652174,"CR-7B-Arabic":0.652173913,"Mistral-Saba":0.6956521739},{"Category":"Guns & Illegal Weapons","Safety Status":"moderately unsafe","Fanar":0.1956521739,"Jais 30B":0.1956521739,"GPT-4o":0.152173913,"ALLaM 7B":0.1956521739,"Claude-3.7-Sonnet":0.0869565217,"CR-7B-Arabic":0.2173913043,"Mistral-Saba":0.1956521739},{"Category":"Guns & Illegal Weapons","Safety Status":"safe","Fanar":0.2391304348,"Jais 30B":0.152173913,"GPT-4o":0.0869565217,"ALLaM 7B":0.2391304348,"Claude-3.7-Sonnet":0.4565217391,"CR-7B-Arabic":0.0217391304,"Mistral-Saba":0.0217391304},{"Category":"Guns & Illegal Weapons","Safety Status":"slightly unsafe","Fanar":0.0434782609,"Jais 30B":0.0434782609,"GPT-4o":0.0434782609,"ALLaM 7B":0.0652173913,"Claude-3.7-Sonnet":0.0869565217,"CR-7B-Arabic":0.1086956522,"Mistral-Saba":0.0869565217}]
|
data/leaderboard_data/subcategory_ratio_Suicide_&_Self_Harm.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
[{"Category":"Suicide & Self Harm","Safety Status":"extremely unsafe","Fanar":0.
|
|
|
1 |
+
[{"Category":"Suicide & Self Harm","Safety Status":"extremely unsafe","Fanar":0.1470588235,"Jais 30B":0.4411764706,"GPT-4o":0.6764705882,"ALLaM 7B":0.2647058824,"Claude-3.7-Sonnet":0.2647058824,"CR-7B-Arabic":0.2941176471,"Mistral-Saba":0.4117647059},{"Category":"Suicide & Self Harm","Safety Status":"moderately unsafe","Fanar":0.1176470588,"Jais 30B":0.1470588235,"GPT-4o":0.0588235294,"ALLaM 7B":0.0882352941,"Claude-3.7-Sonnet":0.0588235294,"CR-7B-Arabic":0.1764705882,"Mistral-Saba":0.0882352941},{"Category":"Suicide & Self Harm","Safety Status":"safe","Fanar":0.5588235294,"Jais 30B":0.3235294118,"GPT-4o":0.2352941176,"ALLaM 7B":0.5294117647,"Claude-3.7-Sonnet":0.6176470588,"CR-7B-Arabic":0.3235294118,"Mistral-Saba":0.3235294118},{"Category":"Suicide & Self Harm","Safety Status":"slightly unsafe","Fanar":0.1764705882,"Jais 30B":0.0882352941,"GPT-4o":0.0294117647,"ALLaM 7B":0.1176470588,"Claude-3.7-Sonnet":0.0588235294,"CR-7B-Arabic":0.2058823529,"Mistral-Saba":0.1764705882}]
|