Spaces:

society-ethics
/

DiffusionClustering

Runtime error

App Files Files Community

yjernite commited on Feb 16, 2023

Commit

32115b5

1 Parent(s): 3a47783

single table

Browse files

Files changed (1) hide show

app.py +148 -45

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 import numpy as np
 import pandas as pd
@@ -8,7 +9,7 @@ pd.options.plotting.backend = "plotly"
 TITLE = "Diffusion Faces Cluster Explorer"
 clusters_12 = pd.read_json("clusters/professions_to_clusters_12.json")
 clusters_24 = pd.read_json("clusters/professions_to_clusters_24.json")
-clusters_48 =  pd.read_json("clusters/professions_to_clusters_48.json")
 clusters_by_size = {
     12: clusters_12,
@@ -16,6 +17,11 @@ clusters_by_size = {
     48: clusters_48,
 }
 prompts = pd.read_csv("promptsadjectives.csv")
 # m_adjectives = prompts['Masc-adj'].tolist()[:10]
 #  f_adjectives = prompts['Fem-adj'].tolist()[:10]
@@ -31,12 +37,15 @@ models = {
 df_models = {
     "All Models": "All",
-    "Stable Diffusion 1.4" : "SD_14",
     "Stable Diffusion 2": "SD_2",
     "Dall-E 2": "DallE",
 }
 def make_profession_plot(num_clusters, prof_name):
     pre_pandas = dict(
         [
             (
@@ -44,12 +53,12 @@ def make_profession_plot(num_clusters, prof_name):
                 dict(
                     (
                         f"Cluster {k}",
-                        clusters_by_size[num_clusters][mod_name][prof_name][
                             "cluster_proportions"
                         ][k],
                     )
                     for k, v in sorted(
-                        clusters_by_size[num_clusters]["All"][prof_name][
                             "cluster_proportions"
                         ].items(),
                         key=lambda x: x[1],
@@ -65,12 +74,95 @@ def make_profession_plot(num_clusters, prof_name):
     prof_plot = df.plot(kind="bar", barmode="group")
     return prof_plot
-def make_profession_table(num_clusters, prof_names, mod_name):
     cl_df = clusters_by_size[num_clusters]
-    clusters_df = cl_df[df_models[mod_name]].apply(pd.Series).loc[prof_names]['cluster_proportions'].apply(pd.Series).reset_index().rename(columns={"index":"profession"}).round(1)
-    labor_df = cl_df[df_models[mod_name]].apply(pd.Series).loc[prof_names]['labor_fm'].apply(pd.Series).rename(columns={0:"female", 1:"male"}).reset_index().rename(columns={"index":"profession"}).round(1)
-    return clusters_df.style.background_gradient(cmap='coolwarm').to_html(), labor_df.style.background_gradient(cmap='coolwarm').to_html()
 with gr.Blocks() as demo:
@@ -86,36 +178,52 @@ with gr.Blocks() as demo:
                     value=12,
                     label="How many clusters do you want to use to represent identities?",
                 )
-                model_choices = gr.Dropdown(["All Models", "Stable Diffusion 1.4", "Stable Diffusion 2", "Dall-E 2"], value="All Models", label="Which models do you want to compare?", interactive= True)
-                profession_choices_1 = gr.Dropdown(professions, value=["CEO", "social worker"], label= "Which professions do you want to compare?", multiselect=True, interactive=True)
-            with gr.Column(scale=3):
-                gr.Markdown("")
-                order = gr.Dropdown(
-                    ["entropy", "cluster/sum of clusters"],
-                    value="entropy",
-                    label="Order rows by:",
                     interactive=True,
                 )
                 with gr.Row():
                     table = gr.HTML(
                         label="Profession assignment per cluster", wrap=True
                     )
-                with gr.Row():
-                    labor_table = gr.HTML(
-                        label="Labor Bureau Statistics per profession", wrap=True
-                    )
-                num_clusters.change(
-                    make_profession_table,
-                    [num_clusters, profession_choices_1,model_choices],
-                    [table, labor_table],
-                    queue=False,
-                )
-                demo.load(
                     make_profession_table,
-                    [num_clusters, profession_choices_1,model_choices],
-                    [table, labor_table],
                     queue=False,
                 )
     #        with gr.Accordion("Tag Frequencies", open=False):
@@ -128,26 +236,21 @@ with gr.Blocks() as demo:
             )
         with gr.Row():
             with gr.Column():
-                profession_choice = gr.Dropdown(
-                    choices=professions, label="Select profession:"
                 )
-                # profession_choice.change(
-                #     make_profession_table,
-                #     [num_clusters, profession_choices_1,model_choices],
-                #     [table, labor_table],
-                #     queue=False,
-                # )
             with gr.Column():
                 plot = gr.Plot(
-                    label=f"Makeup of the cluster assignments for profession {profession_choice}"
                 )
-                #profession_choice.change(
-                #    make_profession_plot,
-                #    [num_clusters, profession_choice],
-                #    plot,
-                #    queue=False,
-               # )
         with gr.Row():
             gr.Markdown("TODO: show examplars for cluster")

 import gradio as gr
+import json
 import numpy as np
 import pandas as pd
 TITLE = "Diffusion Faces Cluster Explorer"
 clusters_12 = pd.read_json("clusters/professions_to_clusters_12.json")
 clusters_24 = pd.read_json("clusters/professions_to_clusters_24.json")
+clusters_48 = pd.read_json("clusters/professions_to_clusters_48.json")
 clusters_by_size = {
     12: clusters_12,
     48: clusters_48,
 }
+clusters_dicts = dict(
+    (num_cl, json.load(open(f"clusters/professions_to_clusters_{num_cl}.json")))
+    for num_cl in [12, 24, 48]
+)
 prompts = pd.read_csv("promptsadjectives.csv")
 # m_adjectives = prompts['Masc-adj'].tolist()[:10]
 #  f_adjectives = prompts['Fem-adj'].tolist()[:10]
 df_models = {
     "All Models": "All",
+    "Stable Diffusion 1.4": "SD_14",
     "Stable Diffusion 2": "SD_2",
     "Dall-E 2": "DallE",
 }
 def make_profession_plot(num_clusters, prof_name):
+    print("-------------")
+    print(num_clusters, prof_name)
     pre_pandas = dict(
         [
             (
                 dict(
                     (
                         f"Cluster {k}",
+                        clusters_dicts[num_clusters][mod_name][prof_name][
                             "cluster_proportions"
                         ][k],
                     )
                     for k, v in sorted(
+                        clusters_dicts[num_clusters]["All"][prof_name][
                             "cluster_proportions"
                         ].items(),
                         key=lambda x: x[1],
     prof_plot = df.plot(kind="bar", barmode="group")
     return prof_plot
+def make_profession_table(num_clusters, prof_names, mod_name, max_cols=8):
+    professions_list_clusters = [
+        (
+            prof_name,
+            clusters_dicts[num_clusters][df_models[mod_name]][prof_name][
+                "cluster_proportions"
+            ],
+        )
+        for prof_name in prof_names
+    ]
+    from pprint import pprint
+    pprint(professions_list_clusters)
+    totals = sorted(
+        [
+            (
+                k,
+                sum(
+                    prof_clusters[str(k)]
+                    for _, prof_clusters in professions_list_clusters
+                ),
+            )
+            for k in range(num_clusters)
+        ],
+        key=lambda x: x[1],
+        reverse=True,
+    )[:max_cols]
+    prof_list_pre_pandas = [
+        dict(
+            [
+                ("Profession", prof_name),
+                (
+                    "Entropy",
+                    clusters_dicts[num_clusters][df_models[mod_name]][prof_name][
+                        "entropy"
+                    ],
+                ),
+                (
+                    "Labor Women",
+                    clusters_dicts[num_clusters][df_models[mod_name]][prof_name][
+                        "labor_fm"
+                    ][0],
+                ),
+                ("", ""),
+            ]
+            + [(f"Cluster {k}", prof_clusters[str(k)]) for k, v in totals if v > 0]
+        )
+        for prof_name, prof_clusters in professions_list_clusters
+    ]
+    clusters_df = pd.DataFrame.from_dict(prof_list_pre_pandas)
+    print("I'm fine")
+    return (
+        clusters_df.style.background_gradient(
+            axis=None, vmin=0, vmax=100, cmap="YlGnBu"
+        )
+        .format(precision=1)
+        .to_html()
+    )
+def make_profession_table_df(num_clusters, prof_names, mod_name):
     cl_df = clusters_by_size[num_clusters]
+    clusters_df = (
+        cl_df[df_models[mod_name]]
+        .apply(pd.Series)
+        .loc[prof_names]["cluster_proportions"]
+        .apply(pd.Series)
+        .reset_index()
+        .rename(columns={"index": "profession"})
+        .round(1)
+    )
+    labor_df = (
+        cl_df[df_models[mod_name]]
+        .apply(pd.Series)
+        .loc[prof_names]["labor_fm"]
+        .apply(pd.Series)
+        .rename(columns={0: "woman", 1: "male"})
+        .reset_index()
+        .rename(columns={"index": "profession"})
+        .round(1)
+    )
+    return (
+        clusters_df.style.background_gradient(cmap="YlGnBu").format(precision=1),
+        labor_df.style.background_gradient(cmap="coolwarm").to_html(),
+    )
+#    return clusters_df.style.background_gradient(axis=None, vmin=0, vmax=100, cmap="YlGnBu").format(precision=1), labor_df.style.background_gradient(cmap='coolwarm').to_html()
 with gr.Blocks() as demo:
                     value=12,
                     label="How many clusters do you want to use to represent identities?",
                 )
+                model_choices = gr.Dropdown(
+                    [
+                        "All Models",
+                        "Stable Diffusion 1.4",
+                        "Stable Diffusion 2",
+                        "Dall-E 2",
+                    ],
+                    value="All Models",
+                    label="Which models do you want to compare?",
+                    interactive=True,
+                )
+                profession_choices_overview = gr.Dropdown(
+                    professions,
+                    value=["CEO", "social worker"],
+                    label="Which professions do you want to compare?",
+                    multiselect=True,
                     interactive=True,
                 )
+            with gr.Column(scale=3):
+                #                gr.Markdown("")
+                #                order = gr.Dropdown(
+                #                    ["entropy", "cluster/sum of clusters"],
+                #                    value="entropy",
+                #                    label="Order rows by:",
+                #                    interactive=True,
+                #                )
                 with gr.Row():
                     table = gr.HTML(
                         label="Profession assignment per cluster", wrap=True
                     )
+                # with gr.Row():
+                #    labor_table = gr.HTML(
+                #        label="Labor Bureau Statistics per profession", wrap=True
+                #    )
+                profession_choices_overview.change(
                     make_profession_table,
+                    [num_clusters, profession_choices_overview, model_choices],
+                    table,
                     queue=False,
                 )
+                # demo.load(
+                #    make_profession_table,
+                #    [num_clusters, profession_choices_1, model_choices],
+                #    [table, labor_table],
+                #    queue=False,
+                # )
     #        with gr.Accordion("Tag Frequencies", open=False):
             )
         with gr.Row():
             with gr.Column():
+                profession_choice_focus = gr.Dropdown(
+                    choices=professions,
+                    value="social worker",
+                    label="Select profession:",
                 )
             with gr.Column():
                 plot = gr.Plot(
+                    label=f"Makeup of the cluster assignments for profession {profession_choice_focus}"
+                )
+                profession_choice_focus.change(
+                    make_profession_plot,
+                    [num_clusters, profession_choice_focus],
+                    plot,
+                    queue=False,
                 )
         with gr.Row():
             gr.Markdown("TODO: show examplars for cluster")