bias-test-gpt-breadcrumbs

Sleeping

App Files Files Community

rjiang12 commited on Jun 30, 2023

Commit

a4f417f

1 Parent(s): 66a14f2

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -5

app.py CHANGED Viewed

@@ -272,6 +272,23 @@ def retrieveSentences(gr1, gr2, att1, att2, progress=gr.Progress()):
             gr.update(value=', '.join(a2))  # att2_fixed
         )
 def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()):
     global G_NUM_SENTENCES
@@ -321,6 +338,8 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
     per_attrib_bias = bias_stats_dict['per_attribute']
     # bias score
     #test_pairs_df['bias_score'] = 0
     test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit']
@@ -356,7 +375,7 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
             gr.update(visible=tabs[0]), # content tab/column 1
             gr.update(visible=tabs[1]), # content tab/column 2
             gr.update(visible=tabs[2]), # content tab/column 3
-            model_bias_dict, # per model bias score
             per_attrib_bias, # per attribute bias score
             gr.update(value=score_templates_df, visible=True), # Pairs with scores
             gr.update(value=interpret_msg, visible=True), # Interpretation message
@@ -533,9 +552,10 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
         with gr.Row():
             with gr.Column(scale=2):
                 lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (↑ more bias)")
-                model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)",
-                                            elem_id="res_label",
-                                            show_label=False)
                 lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
                 attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)",
                                                 elem_id="per_attrib_label_elem",
@@ -575,7 +595,8 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
                            tested_model_name, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn ])
     # Test bias
-    bias_btn.click(fn=startBiasTest,
                    inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
                    outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_label, attribute_bias_labels, test_pairs, interpretation_msg,
                             group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]

             gr.update(value=', '.join(a2))  # att2_fixed
         )
+def bloombergViz(val):
+    percent = round(val * 100)
+    percentStr = f"{percent}"
+    numblocks = 10
+    # options: 4, 9, 10
+    if percentStr in ['0', '25', '26', '27', '48', '49', '50', '51', '52', '73', '74', '75']:
+        numblocks = 4
+    elif percentStr in ['11', '12', '13', '22', '23', '24', '32', '33', '34', '42', '43', '44', '45', '46', '53', '54', '55', '56', '57', '64', '65', '66', '67', '76', '77', '86', '87', '88']:
+        numblocks = 9
+    else:
+        numblocks = 10
+    filled = "<div style='height:12px;width:12px;background-color:#555;display:inline-block'></div> "
+    unfilled = "<div style='height:12px;width:12px;background-color:#999;display:inline-block'></div> "
+    numFilled = round((percent/100) * numblocks)
+    numUnFilled = numblocks - numFilled
+    return numFilled * filled + numUnFilled * unfilled
 def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()):
     global G_NUM_SENTENCES
     per_attrib_bias = bias_stats_dict['per_attribute']
+    model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
     # bias score
     #test_pairs_df['bias_score'] = 0
     test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit']
             gr.update(visible=tabs[0]), # content tab/column 1
             gr.update(visible=tabs[1]), # content tab/column 2
             gr.update(visible=tabs[2]), # content tab/column 3
+            gr.update(value=model_bias_HTML), # per model bias score
             per_attrib_bias, # per attribute bias score
             gr.update(value=score_templates_df, visible=True), # Pairs with scores
             gr.update(value=interpret_msg, visible=True), # Interpretation message
         with gr.Row():
             with gr.Column(scale=2):
                 lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (↑ more bias)")
+                model_bias_html = gr.HTML()
+                # model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)",
+                #                             elem_id="res_label",
+                #                             show_label=False)
                 lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
                 attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)",
                                                 elem_id="per_attrib_label_elem",
                            tested_model_name, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn ])
     # Test bias
+    bias_btn.click(fn=
+                   ,
                    inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
                    outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_label, attribute_bias_labels, test_pairs, interpretation_msg,
                             group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]