yjernite commited on
Commit
32115b5
·
1 Parent(s): 3a47783

single table

Browse files
Files changed (1) hide show
  1. app.py +148 -45
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import numpy as np
3
  import pandas as pd
4
 
@@ -8,7 +9,7 @@ pd.options.plotting.backend = "plotly"
8
  TITLE = "Diffusion Faces Cluster Explorer"
9
  clusters_12 = pd.read_json("clusters/professions_to_clusters_12.json")
10
  clusters_24 = pd.read_json("clusters/professions_to_clusters_24.json")
11
- clusters_48 = pd.read_json("clusters/professions_to_clusters_48.json")
12
 
13
  clusters_by_size = {
14
  12: clusters_12,
@@ -16,6 +17,11 @@ clusters_by_size = {
16
  48: clusters_48,
17
  }
18
 
 
 
 
 
 
19
  prompts = pd.read_csv("promptsadjectives.csv")
20
  # m_adjectives = prompts['Masc-adj'].tolist()[:10]
21
  # f_adjectives = prompts['Fem-adj'].tolist()[:10]
@@ -31,12 +37,15 @@ models = {
31
 
32
  df_models = {
33
  "All Models": "All",
34
- "Stable Diffusion 1.4" : "SD_14",
35
  "Stable Diffusion 2": "SD_2",
36
  "Dall-E 2": "DallE",
37
  }
38
 
 
39
  def make_profession_plot(num_clusters, prof_name):
 
 
40
  pre_pandas = dict(
41
  [
42
  (
@@ -44,12 +53,12 @@ def make_profession_plot(num_clusters, prof_name):
44
  dict(
45
  (
46
  f"Cluster {k}",
47
- clusters_by_size[num_clusters][mod_name][prof_name][
48
  "cluster_proportions"
49
  ][k],
50
  )
51
  for k, v in sorted(
52
- clusters_by_size[num_clusters]["All"][prof_name][
53
  "cluster_proportions"
54
  ].items(),
55
  key=lambda x: x[1],
@@ -65,12 +74,95 @@ def make_profession_plot(num_clusters, prof_name):
65
  prof_plot = df.plot(kind="bar", barmode="group")
66
  return prof_plot
67
 
68
- def make_profession_table(num_clusters, prof_names, mod_name):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  cl_df = clusters_by_size[num_clusters]
70
- clusters_df = cl_df[df_models[mod_name]].apply(pd.Series).loc[prof_names]['cluster_proportions'].apply(pd.Series).reset_index().rename(columns={"index":"profession"}).round(1)
71
- labor_df = cl_df[df_models[mod_name]].apply(pd.Series).loc[prof_names]['labor_fm'].apply(pd.Series).rename(columns={0:"female", 1:"male"}).reset_index().rename(columns={"index":"profession"}).round(1)
72
- return clusters_df.style.background_gradient(cmap='coolwarm').to_html(), labor_df.style.background_gradient(cmap='coolwarm').to_html()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
 
74
 
75
 
76
  with gr.Blocks() as demo:
@@ -86,36 +178,52 @@ with gr.Blocks() as demo:
86
  value=12,
87
  label="How many clusters do you want to use to represent identities?",
88
  )
89
- model_choices = gr.Dropdown(["All Models", "Stable Diffusion 1.4", "Stable Diffusion 2", "Dall-E 2"], value="All Models", label="Which models do you want to compare?", interactive= True)
90
- profession_choices_1 = gr.Dropdown(professions, value=["CEO", "social worker"], label= "Which professions do you want to compare?", multiselect=True, interactive=True)
91
- with gr.Column(scale=3):
92
- gr.Markdown("")
93
- order = gr.Dropdown(
94
- ["entropy", "cluster/sum of clusters"],
95
- value="entropy",
96
- label="Order rows by:",
 
 
 
 
 
 
 
 
97
  interactive=True,
98
  )
 
 
 
 
 
 
 
 
99
  with gr.Row():
100
  table = gr.HTML(
101
  label="Profession assignment per cluster", wrap=True
102
  )
103
- with gr.Row():
104
- labor_table = gr.HTML(
105
- label="Labor Bureau Statistics per profession", wrap=True
106
- )
107
- num_clusters.change(
108
- make_profession_table,
109
- [num_clusters, profession_choices_1,model_choices],
110
- [table, labor_table],
111
- queue=False,
112
- )
113
- demo.load(
114
  make_profession_table,
115
- [num_clusters, profession_choices_1,model_choices],
116
- [table, labor_table],
117
  queue=False,
118
  )
 
 
 
 
 
 
119
 
120
  # with gr.Accordion("Tag Frequencies", open=False):
121
 
@@ -128,26 +236,21 @@ with gr.Blocks() as demo:
128
  )
129
  with gr.Row():
130
  with gr.Column():
131
- profession_choice = gr.Dropdown(
132
- choices=professions, label="Select profession:"
 
 
133
  )
134
-
135
- # profession_choice.change(
136
- # make_profession_table,
137
- # [num_clusters, profession_choices_1,model_choices],
138
- # [table, labor_table],
139
- # queue=False,
140
- # )
141
  with gr.Column():
142
  plot = gr.Plot(
143
- label=f"Makeup of the cluster assignments for profession {profession_choice}"
 
 
 
 
 
 
144
  )
145
- #profession_choice.change(
146
- # make_profession_plot,
147
- # [num_clusters, profession_choice],
148
- # plot,
149
- # queue=False,
150
- # )
151
  with gr.Row():
152
  gr.Markdown("TODO: show examplars for cluster")
153
 
 
1
  import gradio as gr
2
+ import json
3
  import numpy as np
4
  import pandas as pd
5
 
 
9
  TITLE = "Diffusion Faces Cluster Explorer"
10
  clusters_12 = pd.read_json("clusters/professions_to_clusters_12.json")
11
  clusters_24 = pd.read_json("clusters/professions_to_clusters_24.json")
12
+ clusters_48 = pd.read_json("clusters/professions_to_clusters_48.json")
13
 
14
  clusters_by_size = {
15
  12: clusters_12,
 
17
  48: clusters_48,
18
  }
19
 
20
+ clusters_dicts = dict(
21
+ (num_cl, json.load(open(f"clusters/professions_to_clusters_{num_cl}.json")))
22
+ for num_cl in [12, 24, 48]
23
+ )
24
+
25
  prompts = pd.read_csv("promptsadjectives.csv")
26
  # m_adjectives = prompts['Masc-adj'].tolist()[:10]
27
  # f_adjectives = prompts['Fem-adj'].tolist()[:10]
 
37
 
38
  df_models = {
39
  "All Models": "All",
40
+ "Stable Diffusion 1.4": "SD_14",
41
  "Stable Diffusion 2": "SD_2",
42
  "Dall-E 2": "DallE",
43
  }
44
 
45
+
46
  def make_profession_plot(num_clusters, prof_name):
47
+ print("-------------")
48
+ print(num_clusters, prof_name)
49
  pre_pandas = dict(
50
  [
51
  (
 
53
  dict(
54
  (
55
  f"Cluster {k}",
56
+ clusters_dicts[num_clusters][mod_name][prof_name][
57
  "cluster_proportions"
58
  ][k],
59
  )
60
  for k, v in sorted(
61
+ clusters_dicts[num_clusters]["All"][prof_name][
62
  "cluster_proportions"
63
  ].items(),
64
  key=lambda x: x[1],
 
74
  prof_plot = df.plot(kind="bar", barmode="group")
75
  return prof_plot
76
 
77
+
78
+ def make_profession_table(num_clusters, prof_names, mod_name, max_cols=8):
79
+ professions_list_clusters = [
80
+ (
81
+ prof_name,
82
+ clusters_dicts[num_clusters][df_models[mod_name]][prof_name][
83
+ "cluster_proportions"
84
+ ],
85
+ )
86
+ for prof_name in prof_names
87
+ ]
88
+ from pprint import pprint
89
+
90
+ pprint(professions_list_clusters)
91
+ totals = sorted(
92
+ [
93
+ (
94
+ k,
95
+ sum(
96
+ prof_clusters[str(k)]
97
+ for _, prof_clusters in professions_list_clusters
98
+ ),
99
+ )
100
+ for k in range(num_clusters)
101
+ ],
102
+ key=lambda x: x[1],
103
+ reverse=True,
104
+ )[:max_cols]
105
+ prof_list_pre_pandas = [
106
+ dict(
107
+ [
108
+ ("Profession", prof_name),
109
+ (
110
+ "Entropy",
111
+ clusters_dicts[num_clusters][df_models[mod_name]][prof_name][
112
+ "entropy"
113
+ ],
114
+ ),
115
+ (
116
+ "Labor Women",
117
+ clusters_dicts[num_clusters][df_models[mod_name]][prof_name][
118
+ "labor_fm"
119
+ ][0],
120
+ ),
121
+ ("", ""),
122
+ ]
123
+ + [(f"Cluster {k}", prof_clusters[str(k)]) for k, v in totals if v > 0]
124
+ )
125
+ for prof_name, prof_clusters in professions_list_clusters
126
+ ]
127
+ clusters_df = pd.DataFrame.from_dict(prof_list_pre_pandas)
128
+ print("I'm fine")
129
+ return (
130
+ clusters_df.style.background_gradient(
131
+ axis=None, vmin=0, vmax=100, cmap="YlGnBu"
132
+ )
133
+ .format(precision=1)
134
+ .to_html()
135
+ )
136
+
137
+
138
+ def make_profession_table_df(num_clusters, prof_names, mod_name):
139
  cl_df = clusters_by_size[num_clusters]
140
+ clusters_df = (
141
+ cl_df[df_models[mod_name]]
142
+ .apply(pd.Series)
143
+ .loc[prof_names]["cluster_proportions"]
144
+ .apply(pd.Series)
145
+ .reset_index()
146
+ .rename(columns={"index": "profession"})
147
+ .round(1)
148
+ )
149
+ labor_df = (
150
+ cl_df[df_models[mod_name]]
151
+ .apply(pd.Series)
152
+ .loc[prof_names]["labor_fm"]
153
+ .apply(pd.Series)
154
+ .rename(columns={0: "woman", 1: "male"})
155
+ .reset_index()
156
+ .rename(columns={"index": "profession"})
157
+ .round(1)
158
+ )
159
+ return (
160
+ clusters_df.style.background_gradient(cmap="YlGnBu").format(precision=1),
161
+ labor_df.style.background_gradient(cmap="coolwarm").to_html(),
162
+ )
163
+
164
 
165
+ # return clusters_df.style.background_gradient(axis=None, vmin=0, vmax=100, cmap="YlGnBu").format(precision=1), labor_df.style.background_gradient(cmap='coolwarm').to_html()
166
 
167
 
168
  with gr.Blocks() as demo:
 
178
  value=12,
179
  label="How many clusters do you want to use to represent identities?",
180
  )
181
+ model_choices = gr.Dropdown(
182
+ [
183
+ "All Models",
184
+ "Stable Diffusion 1.4",
185
+ "Stable Diffusion 2",
186
+ "Dall-E 2",
187
+ ],
188
+ value="All Models",
189
+ label="Which models do you want to compare?",
190
+ interactive=True,
191
+ )
192
+ profession_choices_overview = gr.Dropdown(
193
+ professions,
194
+ value=["CEO", "social worker"],
195
+ label="Which professions do you want to compare?",
196
+ multiselect=True,
197
  interactive=True,
198
  )
199
+ with gr.Column(scale=3):
200
+ # gr.Markdown("")
201
+ # order = gr.Dropdown(
202
+ # ["entropy", "cluster/sum of clusters"],
203
+ # value="entropy",
204
+ # label="Order rows by:",
205
+ # interactive=True,
206
+ # )
207
  with gr.Row():
208
  table = gr.HTML(
209
  label="Profession assignment per cluster", wrap=True
210
  )
211
+ # with gr.Row():
212
+ # labor_table = gr.HTML(
213
+ # label="Labor Bureau Statistics per profession", wrap=True
214
+ # )
215
+ profession_choices_overview.change(
 
 
 
 
 
 
216
  make_profession_table,
217
+ [num_clusters, profession_choices_overview, model_choices],
218
+ table,
219
  queue=False,
220
  )
221
+ # demo.load(
222
+ # make_profession_table,
223
+ # [num_clusters, profession_choices_1, model_choices],
224
+ # [table, labor_table],
225
+ # queue=False,
226
+ # )
227
 
228
  # with gr.Accordion("Tag Frequencies", open=False):
229
 
 
236
  )
237
  with gr.Row():
238
  with gr.Column():
239
+ profession_choice_focus = gr.Dropdown(
240
+ choices=professions,
241
+ value="social worker",
242
+ label="Select profession:",
243
  )
 
 
 
 
 
 
 
244
  with gr.Column():
245
  plot = gr.Plot(
246
+ label=f"Makeup of the cluster assignments for profession {profession_choice_focus}"
247
+ )
248
+ profession_choice_focus.change(
249
+ make_profession_plot,
250
+ [num_clusters, profession_choice_focus],
251
+ plot,
252
+ queue=False,
253
  )
 
 
 
 
 
 
254
  with gr.Row():
255
  gr.Markdown("TODO: show examplars for cluster")
256