Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -315,6 +315,9 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
315 |
print(f"Bias spec dict: {bias_spec}")
|
316 |
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec)
|
317 |
|
|
|
|
|
|
|
318 |
# 2. convert to templates
|
319 |
test_sentences_df['Template'] = test_sentences_df.apply(bt_mgr.sentence_to_template, axis=1)
|
320 |
print(f"Data with template: {test_sentences_df}")
|
@@ -346,13 +349,23 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
346 |
|
347 |
model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
|
348 |
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
num_atts = 0
|
351 |
for att, score in attrib_by_score.items():
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
|
|
356 |
|
357 |
# bias score
|
358 |
#test_pairs_df['bias_score'] = 0
|
@@ -390,7 +403,8 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
390 |
gr.update(visible=tabs[1]), # content tab/column 2
|
391 |
gr.update(visible=tabs[2]), # content tab/column 3
|
392 |
gr.update(value=model_bias_HTML), # per model bias score
|
393 |
-
gr.update(value=
|
|
|
394 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
395 |
gr.update(value=interpret_msg, visible=True), # Interpretation message
|
396 |
gr.update(value=', '.join(g1)), # gr1_fixed
|
@@ -576,7 +590,13 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
576 |
# container=True,
|
577 |
# min_width=900,
|
578 |
# show_label=False)
|
579 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
with gr.Column(scale=1):
|
581 |
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem'20<a>", visible=False)
|
582 |
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
|
@@ -612,8 +632,8 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
612 |
# Test bias
|
613 |
bias_btn.click(fn=startBiasTest,
|
614 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
615 |
-
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_html,
|
616 |
-
group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|
617 |
)
|
618 |
|
619 |
# top breadcrumbs
|
|
|
315 |
print(f"Bias spec dict: {bias_spec}")
|
316 |
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec)
|
317 |
|
318 |
+
attributes_g1 = bias_spec['attributes']['attribute 1']
|
319 |
+
attributes_g2 = bias_spec['attributes']['attribute 2']
|
320 |
+
|
321 |
# 2. convert to templates
|
322 |
test_sentences_df['Template'] = test_sentences_df.apply(bt_mgr.sentence_to_template, axis=1)
|
323 |
print(f"Data with template: {test_sentences_df}")
|
|
|
349 |
|
350 |
model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
|
351 |
|
352 |
+
per_attrib_bias_HTML_stereo = ""
|
353 |
+
num_atts = 0
|
354 |
+
for att, score in attrib_by_score.items():
|
355 |
+
if att in attributes_g1:
|
356 |
+
per_attrib_bias_HTML_stereo += att_bloombergViz(att, score)
|
357 |
+
num_atts += 1
|
358 |
+
if num_atts >= 8:
|
359 |
+
break
|
360 |
+
|
361 |
+
per_attrib_bias_HTML_antistereo = ""
|
362 |
num_atts = 0
|
363 |
for att, score in attrib_by_score.items():
|
364 |
+
if att in attributes_g2:
|
365 |
+
per_attrib_bias_HTML_antistereo += att_bloombergViz(att, score)
|
366 |
+
num_atts += 1
|
367 |
+
if num_atts >= 8:
|
368 |
+
break
|
369 |
|
370 |
# bias score
|
371 |
#test_pairs_df['bias_score'] = 0
|
|
|
403 |
gr.update(visible=tabs[1]), # content tab/column 2
|
404 |
gr.update(visible=tabs[2]), # content tab/column 3
|
405 |
gr.update(value=model_bias_HTML), # per model bias score
|
406 |
+
gr.update(value=per_attrib_bias_HTML_stereo), # per attribute bias score stereotyped
|
407 |
+
gr.update(value=per_attrib_bias_HTML_antistereo), # per attribute bias score antistereotyped
|
408 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
409 |
gr.update(value=interpret_msg, visible=True), # Interpretation message
|
410 |
gr.update(value=', '.join(g1)), # gr1_fixed
|
|
|
590 |
# container=True,
|
591 |
# min_width=900,
|
592 |
# show_label=False)
|
593 |
+
with gr.Row():
|
594 |
+
with gr.Column():
|
595 |
+
gr.Markdown("Group 1 stereotypes")
|
596 |
+
attribute_bias_html_stereo = gr.HTML()
|
597 |
+
with gr.Column():
|
598 |
+
gr.Markdown("Group 2 stereotypes")
|
599 |
+
attribute_bias_html_antistereo = gr.HTML()
|
600 |
with gr.Column(scale=1):
|
601 |
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem'20<a>", visible=False)
|
602 |
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
|
|
|
632 |
# Test bias
|
633 |
bias_btn.click(fn=startBiasTest,
|
634 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
635 |
+
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_html, attribute_bias_html_stereo, attribute_bias_html_antistereo,
|
636 |
+
test_pairs, interpretation_msg, group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|
637 |
)
|
638 |
|
639 |
# top breadcrumbs
|