Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -289,6 +289,11 @@ def bloombergViz(val):
|
|
289 |
numUnFilled = numblocks - numFilled
|
290 |
return numFilled * filled + numUnFilled * unfilled
|
291 |
|
|
|
|
|
|
|
|
|
|
|
292 |
def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()):
|
293 |
global G_NUM_SENTENCES
|
294 |
|
@@ -340,6 +345,10 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
340 |
|
341 |
model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
|
342 |
|
|
|
|
|
|
|
|
|
343 |
# bias score
|
344 |
#test_pairs_df['bias_score'] = 0
|
345 |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit']
|
@@ -376,7 +385,7 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
376 |
gr.update(visible=tabs[1]), # content tab/column 2
|
377 |
gr.update(visible=tabs[2]), # content tab/column 3
|
378 |
gr.update(value=model_bias_HTML), # per model bias score
|
379 |
-
|
380 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
381 |
gr.update(value=interpret_msg, visible=True), # Interpretation message
|
382 |
gr.update(value=', '.join(g1)), # gr1_fixed
|
@@ -557,11 +566,12 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
557 |
# elem_id="res_label",
|
558 |
# show_label=False)
|
559 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
|
560 |
-
attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)",
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
|
|
565 |
with gr.Column(scale=1):
|
566 |
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem'20<a>", visible=False)
|
567 |
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
|
@@ -597,7 +607,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
597 |
# Test bias
|
598 |
bias_btn.click(fn=startBiasTest,
|
599 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
600 |
-
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_html,
|
601 |
group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|
602 |
)
|
603 |
|
|
|
289 |
numUnFilled = numblocks - numFilled
|
290 |
return numFilled * filled + numUnFilled * unfilled
|
291 |
|
292 |
+
def att_bloombergViz(att, val):
|
293 |
+
viz = bloombergViz(val)
|
294 |
+
attHTML = f"<div>{att}: {round(val*100)}%<br>{viz}</div>"
|
295 |
+
return attHTML
|
296 |
+
|
297 |
def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()):
|
298 |
global G_NUM_SENTENCES
|
299 |
|
|
|
345 |
|
346 |
model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
|
347 |
|
348 |
+
per_attrib_bias_HTML = ""
|
349 |
+
for att, score in bias_stats_dict['per_attribute']:
|
350 |
+
per_attrib_bias += att_bloombergViz(att, score)
|
351 |
+
|
352 |
# bias score
|
353 |
#test_pairs_df['bias_score'] = 0
|
354 |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit']
|
|
|
385 |
gr.update(visible=tabs[1]), # content tab/column 2
|
386 |
gr.update(visible=tabs[2]), # content tab/column 3
|
387 |
gr.update(value=model_bias_HTML), # per model bias score
|
388 |
+
gr.update(value=per_attrib_bias_HTML), # per attribute bias score
|
389 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
390 |
gr.update(value=interpret_msg, visible=True), # Interpretation message
|
391 |
gr.update(value=', '.join(g1)), # gr1_fixed
|
|
|
566 |
# elem_id="res_label",
|
567 |
# show_label=False)
|
568 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
|
569 |
+
# attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)",
|
570 |
+
# elem_id="per_attrib_label_elem",
|
571 |
+
# container=True,
|
572 |
+
# min_width=900,
|
573 |
+
# show_label=False)
|
574 |
+
attribute_bias_html = gr.HTML()
|
575 |
with gr.Column(scale=1):
|
576 |
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem'20<a>", visible=False)
|
577 |
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
|
|
|
607 |
# Test bias
|
608 |
bias_btn.click(fn=startBiasTest,
|
609 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
610 |
+
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_html, attribute_bias_html, test_pairs, interpretation_msg,
|
611 |
group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|
612 |
)
|
613 |
|