Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -272,6 +272,23 @@ def retrieveSentences(gr1, gr2, att1, att2, progress=gr.Progress()):
|
|
272 |
gr.update(value=', '.join(a2)) # att2_fixed
|
273 |
)
|
274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()):
|
276 |
global G_NUM_SENTENCES
|
277 |
|
@@ -321,6 +338,8 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
321 |
|
322 |
per_attrib_bias = bias_stats_dict['per_attribute']
|
323 |
|
|
|
|
|
324 |
# bias score
|
325 |
#test_pairs_df['bias_score'] = 0
|
326 |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit']
|
@@ -356,7 +375,7 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
356 |
gr.update(visible=tabs[0]), # content tab/column 1
|
357 |
gr.update(visible=tabs[1]), # content tab/column 2
|
358 |
gr.update(visible=tabs[2]), # content tab/column 3
|
359 |
-
|
360 |
per_attrib_bias, # per attribute bias score
|
361 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
362 |
gr.update(value=interpret_msg, visible=True), # Interpretation message
|
@@ -533,9 +552,10 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
533 |
with gr.Row():
|
534 |
with gr.Column(scale=2):
|
535 |
lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (↑ more bias)")
|
536 |
-
|
537 |
-
|
538 |
-
|
|
|
539 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
|
540 |
attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)",
|
541 |
elem_id="per_attrib_label_elem",
|
@@ -575,7 +595,8 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
575 |
tested_model_name, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn ])
|
576 |
|
577 |
# Test bias
|
578 |
-
bias_btn.click(fn=
|
|
|
579 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
580 |
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_label, attribute_bias_labels, test_pairs, interpretation_msg,
|
581 |
group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|
|
|
272 |
gr.update(value=', '.join(a2)) # att2_fixed
|
273 |
)
|
274 |
|
275 |
+
def bloombergViz(val):
|
276 |
+
percent = round(val * 100)
|
277 |
+
percentStr = f"{percent}"
|
278 |
+
numblocks = 10
|
279 |
+
# options: 4, 9, 10
|
280 |
+
if percentStr in ['0', '25', '26', '27', '48', '49', '50', '51', '52', '73', '74', '75']:
|
281 |
+
numblocks = 4
|
282 |
+
elif percentStr in ['11', '12', '13', '22', '23', '24', '32', '33', '34', '42', '43', '44', '45', '46', '53', '54', '55', '56', '57', '64', '65', '66', '67', '76', '77', '86', '87', '88']:
|
283 |
+
numblocks = 9
|
284 |
+
else:
|
285 |
+
numblocks = 10
|
286 |
+
filled = "<div style='height:12px;width:12px;background-color:#555;display:inline-block'></div> "
|
287 |
+
unfilled = "<div style='height:12px;width:12px;background-color:#999;display:inline-block'></div> "
|
288 |
+
numFilled = round((percent/100) * numblocks)
|
289 |
+
numUnFilled = numblocks - numFilled
|
290 |
+
return numFilled * filled + numUnFilled * unfilled
|
291 |
+
|
292 |
def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()):
|
293 |
global G_NUM_SENTENCES
|
294 |
|
|
|
338 |
|
339 |
per_attrib_bias = bias_stats_dict['per_attribute']
|
340 |
|
341 |
+
model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
|
342 |
+
|
343 |
# bias score
|
344 |
#test_pairs_df['bias_score'] = 0
|
345 |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit']
|
|
|
375 |
gr.update(visible=tabs[0]), # content tab/column 1
|
376 |
gr.update(visible=tabs[1]), # content tab/column 2
|
377 |
gr.update(visible=tabs[2]), # content tab/column 3
|
378 |
+
gr.update(value=model_bias_HTML), # per model bias score
|
379 |
per_attrib_bias, # per attribute bias score
|
380 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
381 |
gr.update(value=interpret_msg, visible=True), # Interpretation message
|
|
|
552 |
with gr.Row():
|
553 |
with gr.Column(scale=2):
|
554 |
lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (↑ more bias)")
|
555 |
+
model_bias_html = gr.HTML()
|
556 |
+
# model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)",
|
557 |
+
# elem_id="res_label",
|
558 |
+
# show_label=False)
|
559 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
|
560 |
attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)",
|
561 |
elem_id="per_attrib_label_elem",
|
|
|
595 |
tested_model_name, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn ])
|
596 |
|
597 |
# Test bias
|
598 |
+
bias_btn.click(fn=
|
599 |
+
,
|
600 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
601 |
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_label, attribute_bias_labels, test_pairs, interpretation_msg,
|
602 |
group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|