NanLi2021 commited on
Commit
c3279e7
·
1 Parent(s): bc7ad4c
.gitignore ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Datafiles
2
+ **/outputs/
3
+ **/data/
4
+ note.md
5
+
6
+
7
+ # Byte-compiled / optimized / DLL files
8
+ .idea/
9
+ __pycache__/
10
+ *.py[cod]
11
+ *$py.class
12
+
13
+ # C extensions
14
+ *.so
15
+
16
+ # Distribution / packaging
17
+ .Python
18
+ build/
19
+ develop-eggs/
20
+ dist/
21
+ downloads/
22
+ eggs/
23
+ .eggs/
24
+ lib/
25
+ lib64/
26
+ parts/
27
+ sdist/
28
+ var/
29
+ wheels/
30
+ pip-wheel-metadata/
31
+ share/python-wheels/
32
+ *.egg-info/
33
+ .installed.cfg
34
+ *.egg
35
+ MANIFEST
36
+
37
+ # PyInstaller
38
+ # Usually these files are written by a python script from a template
39
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
40
+ *.manifest
41
+ *.spec
42
+
43
+ # Installer logs
44
+ pip-log.txt
45
+ pip-delete-this-directory.txt
46
+
47
+ # Unit test / coverage reports
48
+ htmlcov/
49
+ .tox/
50
+ .nox/
51
+ .coverage
52
+ .coverage.*
53
+ .cache
54
+ nosetests.xml
55
+ coverage.xml
56
+ *.cover
57
+ *.py,cover
58
+ .hypothesis/
59
+ .pytest_cache/
60
+
61
+ # Translations
62
+ *.mo
63
+ *.pot
64
+
65
+ # Django stuff:
66
+ *.log
67
+ local_settings.py
68
+ db.sqlite3
69
+ db.sqlite3-journal
70
+
71
+ # Flask stuff:
72
+ instance/
73
+ .webassets-cache
74
+
75
+ # Scrapy stuff:
76
+ .scrapy
77
+
78
+ # Sphinx documentation
79
+ docs/_build/
80
+
81
+ # PyBuilder
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102
+ __pypackages__/
103
+
104
+ # Celery stuff
105
+ celerybeat-schedule
106
+ celerybeat.pid
107
+
108
+ # SageMath parsed files
109
+ *.sage.py
110
+
111
+ # Environments
112
+ .env
113
+ .venv
114
+ env/
115
+ venv/
116
+ ENV/
117
+ env.bak/
118
+ venv.bak/
119
+
120
+ # Spyder project settings
121
+ .spyderproject
122
+ .spyproject
123
+
124
+ # Rope project settings
125
+ .ropeproject
126
+
127
+ # mkdocs documentation
128
+ /site
129
+
130
+ # mypy
131
+ .mypy_cache/
132
+ .dmypy.json
133
+ dmypy.json
134
+
135
+ # Pyre type checker
136
+ .pyre/
app.py CHANGED
@@ -1,7 +1,431 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
1
+ import sys
2
+
3
+ from pathlib import Path
4
+ import string
5
+ import random
6
+ import torch
7
+ import numpy as np
8
+ import pickle
9
+
10
  import gradio as gr
11
+ import pandas as pd
12
+ from scipy.special import softmax
13
+ import numpy as np
14
+ import seaborn as sns
15
+ import matplotlib.pyplot as plt
16
+ import hydra
17
+ from omegaconf import open_dict, DictConfig
18
+ import matplotlib.pyplot as plt
19
+ import matplotlib
20
+ from matplotlib.patches import Patch
21
+ sns.set()
22
+ sns.set_style("darkgrid")
23
+
24
+ from utils.data import *
25
+ from utils.metrics import *
26
+
27
+
28
+
29
+ def user_interface(Ufile, Pfile, Sfile=None, job_meta_file=None, user_meta_file=None, user_groups=None):
30
+ recdata = Data(Ufile, Pfile, Sfile, job_meta_file, user_meta_file, user_groups)
31
+
32
+
33
+ def calculate_user_item_metrics(res, S, U, k=10):
34
+ # get rec
35
+ m, n = res.shape
36
+ if not torch.is_tensor(res):
37
+ res = torch.from_numpy(res)
38
+ if not torch.is_tensor(U):
39
+ U = torch.from_numpy(U)
40
+ _, rec = torch.topk(res, k, dim=1)
41
+ rec_onehot = slow_onehot(rec, res)
42
+ # rec_onehot = F.one_hot(rec, num_classes=n).sum(1).float()
43
+ try:
44
+ rec_per_job = rec_onehot.sum(axis=0).numpy()
45
+ except:
46
+ rec_per_job = rec_onehot.sum(axis=0).cpu().numpy()
47
+ rec = rec.cpu()
48
+ S = S.cpu()
49
+ # envy
50
+ envy = expected_envy_torch_vec(U, rec_onehot, k=1).numpy()
51
+
52
+ # competitors for each rec job
53
+ competitors = get_competitors(rec_per_job, rec)
54
+
55
+ # rank
56
+ better_competitors = get_num_better_competitors(rec, S)
57
+
58
+ # scores per job for later zoom in scores
59
+ scores = get_scores_per_job(rec, S)
60
+
61
+ return {'rec': rec, 'envy': envy, 'competitors': competitors, 'ranks': better_competitors, 'scores_job': scores}
62
+
63
+
64
+ def plot_user_envy(user=0, k=2):
65
+ plt.close('all')
66
+ user = int(user)
67
+ if k in recdata.lookup_dict:
68
+ ret_dict = recdata.lookup_dict[k]
69
+ else:
70
+ ret_dict = calculate_user_item_metrics(recdata.P_sub, recdata.S_sub, recdata.U_sub, k=k)
71
+ recdata.lookup_dict[k] = ret_dict
72
+ # user's recommended jobs
73
+ users_rec = ret_dict['rec'][user].numpy()
74
+ # Plot
75
+ fig, ax1 = plt.subplots(figsize=(10, 5))
76
+ # fig.tight_layout()
77
+ fig.subplots_adjust(bottom=0.2)
78
+
79
+ envy = ret_dict['envy'].sum(-1)
80
+ envy_user = envy[user]
81
+ # plot envy histogram
82
+ n, bins, patches = ax1.hist(envy, bins=50, color='grey', alpha=0.5)
83
+ ax1.set_yscale('symlog')
84
+ sns.kdeplot(envy, color='grey', bw_adjust=0.3, cut=0, ax=ax1)
85
+ # mark this user's envy
86
+ # index of the bin that contains this user's envy
87
+ idx = np.digitize(envy_user, bins)
88
+ # print(envy_user, idx)
89
+ patches[idx-1].set_fc('r')
90
+ ax1.legend(handles=[Patch(facecolor='r', edgecolor='r', alpha=0.5,
91
+ label='Your envy group')])
92
+ ax1.set_xlabel('Envy')
93
+ ax1.set_ylabel('Number of users (log scale)')
94
+
95
+ return fig
96
+
97
+ def plot_user_scores(user=0, k=2):
98
+ user = int(user)
99
+ if k in recdata.lookup_dict:
100
+ ret_dict = recdata.lookup_dict[k]
101
+ else:
102
+ ret_dict = calculate_user_item_metrics(recdata.P_sub, recdata.S_sub, recdata.U_sub, k=k)
103
+ recdata.lookup_dict[k] = ret_dict
104
+ users_rec = ret_dict['rec'][user].numpy()
105
+ scores = ret_dict['scores_job']
106
+
107
+ # scores = [softmax(np.array(scores[jb])*0.5) for jb in users_rec]
108
+ scores = [scores[jb] for jb in users_rec]
109
+
110
+ rank_xs = [list(range(1, len(s)+1)) for s in scores]
111
+ my_ranks = [1+int(i) for i in ret_dict['ranks'][user]]
112
+ # my scores are the scores of the recommended jobs with rank
113
+ # my_scores = [scores[i][j] for i, j in enumerate(my_ranks)]
114
+ my_scores = [recdata.S_sub[user, job_id].item() for job_id in users_rec]
115
+ # my_scores_log = np.log(np.array(my_scores).astype(float))
116
+ ys = np.arange(len(users_rec))
117
+ # user's recommended jobs
118
+ if (user, k) in recdata.user_temp_data:
119
+ df = recdata.user_temp_data[(user, k)]
120
+ else:
121
+ df = pd.DataFrame({'x': rank_xs, 's': scores, 'y': ys})
122
+ df = df.explode(list('xs'))
123
+ recdata.user_temp_data[(user, k)] = df
124
+
125
+ # df['log_scores'] = np.log(df['s'].values.astype(float))
126
+ fig, ax = plt.subplots(figsize=(10, 5))
127
+ # fig.tight_layout()
128
+ fig.subplots_adjust(bottom=0.3)
129
+
130
+ def sub_cmap(cmap, vmin, vmax):
131
+ return lambda v: cmap(vmin + (vmax - vmin) * v)
132
+
133
+ # palette=matplotlib.cm.get_cmap('Greens').reversed()
134
+ # palette = sub_cmap(palette,0.2, 0.8)
135
+
136
+ sns.scatterplot(data=df, x="y", y="s", ax=ax, alpha=0.6,
137
+ legend=False, s=100, hue='y', palette="summer") #monotone color palette
138
+ sns.scatterplot(y=my_scores, x=range(k), ax=ax,
139
+ alpha=0.8, s=200, ec='r', fc='none', label='Your rank')
140
+ # add ranking of this user's score for each job
141
+ # find score gaps
142
+ gaps = np.diff(np.sort(scores[0])).mean()
143
+ for i, (y, x) in enumerate(zip(my_scores, range(k))):
144
+ ax.text(x-0.3, y+gaps, my_ranks[i], color='r', fontsize=15)
145
+ # add notation for 'rank'
146
+ # ax.text(-0.8, 1.12, 'Your rank', color='r', fontsize=12)
147
+ ax.set_xticks(range(k))
148
+ # shorten the job title
149
+ titles = [recdata.job_metadata[jb] for jb in users_rec]
150
+ titles = [t[:20] + '...' if len(t) > 20 else t for t in titles]
151
+ ax.set_xticklabels(titles, rotation=30, ha='right')
152
+ ax.set_xlabel('')
153
+ ax.set_xlim(-1, k)
154
+ # ax.grid(False)
155
+ ax.set_ylabel('Score')
156
+ # ax.set_ylim(-0.09, 1.2)
157
+ ax.legend()
158
+ return fig
159
+
160
+
161
+ # demo = gr.Blocks(gr.themes.Base.from_hub('finlaymacklon/smooth_slate'))
162
+ demo = gr.Blocks(gr.themes.Soft())
163
+ with demo:
164
+ def submit0(user, k):
165
+ fig = plot_user_envy(user, k)
166
+ return {
167
+ hist_plot: gr.update(value=fig, visible=True),
168
+ }
169
+
170
+
171
+ def submit2(user, k):
172
+ bar = plot_user_scores(user, k)
173
+ return {
174
+ bar_plot2: gr.update(value=bar, visible=True)
175
+ }
176
+
177
+ def submit(user):
178
+ new_job_num = random.randint(1,6)
179
+ # if new_job_num == 0, do nothing but clear the plots
180
+ if new_job_num > 0:
181
+ print(f'adding {new_job_num} new jobs')
182
+ recdata.update(new_user_num=0, new_job_num=new_job_num)
183
+ recdata.tweak_P(user)
184
+
185
+ return {
186
+ hist_plot: gr.update(visible=False),
187
+ bar_plot2: gr.update(visible=False)
188
+ }
189
+
190
+ # def submit_login(user):
191
+ # return {
192
+ # k: gr.update(visible=True),
193
+ # btn: gr.update(visible=True),
194
+ # btn0: gr.update(visible=True),
195
+ # btn2: gr.update(visible=True),
196
+ # pswd: gr.update(visible=False),
197
+ # lgbtn: gr.update(visible=False),
198
+ # }
199
+
200
+
201
+ # layout
202
+ gr.Markdown("## Job Recommendation Inferiority and Envy Monitor Demo")
203
+
204
+ with gr.Row():
205
+ with gr.Column(scale=1):
206
+ user = gr.Textbox(label='User ID',default='0', placeholder='Enter a random integer user ID')
207
+ # with gr.Column(scale=1):
208
+ # pswd = gr.Textbox(label='Password',default='********')
209
+ # with gr.Column(scale=1):
210
+ # lgbtn = gr.Button("Login")
211
+ # with gr.Row():
212
+ with gr.Column(scale=1):
213
+ k = gr.Slider(minimum=1, maximum=20,
214
+ default=4, step=1, label='Number of Jobs', visible=True)
215
+ with gr.Column(scale=1):
216
+ btn = gr.Button("Refresh to see new jobs", visible=True)
217
+
218
+ with gr.Tab('Envy'):
219
+ btn0 = gr.Button("User envy distribution", visible=True)
220
+ hist_plot = gr.Plot(visible=False)
221
+
222
+ with gr.Tab('Inferiority'):
223
+ with gr.Row():
224
+ # btn1 = gr.Button("User ranks for the recommended jobs")
225
+ btn2 = gr.Button("User scores/ranks for the recommended jobs", visible=True)
226
+
227
+ # bar_plot = gr.Plot()
228
+ bar_plot2 = gr.Plot(visible=False)
229
+
230
+ # lgbtn.click(submit_login, inputs=[user], outputs=[k, btn, btn0, btn2, pswd, lgbtn])
231
+ btn.click(submit, inputs=[user], outputs=[hist_plot, bar_plot2])
232
+ btn0.click(submit0, inputs=[user, k], outputs=[hist_plot])
233
+ # btn1.click(submit1, inputs=[user, k], outputs=[bar_plot])
234
+ btn2.click(submit2, inputs=[user, k], outputs=[bar_plot2])
235
+
236
+ return demo
237
+
238
+
239
+ def developer_interface(Ufile, Pfile, Sfile=None, job_meta_file=None, user_meta_file=None, user_groups=None):
240
+
241
+ recdata = Data(Ufile, Pfile, Sfile, job_meta_file, user_meta_file, user_groups, sub_sample_size=500)
242
+
243
+ def calculate_all_metrics(k, S_sub, U_sub, P_sub):
244
+ print('calculating all metrics')
245
+ if k in recdata.lookup_dict:
246
+ print('Found in lookup dict')
247
+ return recdata.lookup_dict[k]
248
+ else:
249
+ if not torch.is_tensor(P_sub):
250
+ P_sub = torch.from_numpy(P_sub)
251
+ envy, inferiority, utility = eiu_cut_off2(
252
+ (S_sub, U_sub), P_sub, k=k, agg=False)
253
+ envy = envy.sum(-1)
254
+ inferiority = inferiority.sum(-1)
255
+
256
+ _, rec = torch.topk(P_sub, k=k, dim=1)
257
+ rec_onehot = slow_onehot(rec, P_sub)
258
+ try:
259
+ rec_per_job = rec_onehot.sum(axis=0).numpy()
260
+ except:
261
+ rec_per_job = rec_onehot.sum(axis=0).cpu().numpy()
262
+ rec = rec.cpu()
263
+ metrics_at_k = {'rec': rec, 'envy': envy, 'inferiority': inferiority, 'utility': utility,
264
+ 'rec_per_job': rec_per_job}
265
+ print('Finished calculating all metrics')
266
+ return metrics_at_k
267
+
268
+ def plot_user_box(metrics_dict):
269
+ print('plotting user box')
270
+ plt.close('all')
271
+ envy = metrics_dict['envy'].numpy()
272
+ inferiority = metrics_dict['inferiority'].numpy()
273
+ fig, (ax1, ax2) = plt.subplots(ncols=2)
274
+ fig.tight_layout()
275
+ ax1.boxplot(envy)
276
+ ax1.set_ylabel('envy')
277
+ ax1.set_title('Envy')
278
+ ax1.set_xticks([])
279
+ ax2.boxplot(inferiority)
280
+ ax2.set_ylabel('inferiority')
281
+ ax2.set_title('Inferiority')
282
+ ax2.set_xticks([])
283
+ return fig
284
+
285
+ def plot_scatter(k, group=None):
286
+ print('plotting scatter')
287
+ plt.close('all')
288
+ if group == 'None':
289
+ group = None
290
+ if k in recdata.lookup_dict:
291
+ metrics_dict = recdata.lookup_dict[k]
292
+ else:
293
+ metrics_dict = calculate_all_metrics(k, recdata.S_sub, recdata.U_sub, recdata.P_sub)
294
+ recdata.lookup_dict[k] = metrics_dict
295
+
296
+ data = {'log(envy+1)': np.log(metrics_dict['envy']+1),
297
+ 'inferiority': metrics_dict['inferiority']}
298
+ data = pd.DataFrame(data)
299
+ data = data.join(recdata.user_metadata)
300
+ fig, ax = plt.subplots()
301
+ sns.scatterplot(data=data, x='log(envy+1)', y='inferiority', hue=group, ax=ax)
302
+ return fig
303
+
304
+ def lorenz_curve(X, ax, label):
305
+ # ref: https://zhiyzuo.github.io/Plot-Lorenz/
306
+ X.sort()
307
+ X_lorenz = X.cumsum() / X.sum()
308
+ X_lorenz = np.insert(X_lorenz, 0, 0)
309
+ X_lorenz[0], X_lorenz[-1]
310
+
311
+ ax.plot(np.arange(X_lorenz.size) / (X_lorenz.size - 1), X_lorenz, label=label)
312
+ ## line plot of equality
313
+ ax.plot([0, 1], [0, 1], linestyle='dashed', color='k')
314
+ return ax
315
+
316
+ def plot_item(rec_per_job):
317
+ print('plotting item')
318
+ plt.close('all')
319
+ fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(10, 10))
320
+ fig.tight_layout(pad=5.0)
321
+ labels, counts = np.unique(rec_per_job, return_counts=True)
322
+ ax1.bar(labels, counts, align='center')
323
+
324
+ ax1.set_xlabel('Number of times a job is recommended')
325
+ ax1.set_ylabel('Number of jobs')
326
+ ax1.set_title('Distribution of job exposure')
327
+ ax2 = lorenz_curve(rec_per_job, ax2,'')
328
+ ax2.set_title('Lorenz Curve')
329
+ return fig
330
+
331
+
332
+ # build the interface
333
+ demo = gr.Blocks(gr.themes.Soft())
334
+ with demo:
335
+ # callbacks
336
+ def submit_u():
337
+ # generate two random integers including 0 representing user num and job num
338
+ user_num = np.random.randint(0, 5)
339
+ job_num = np.random.randint(0, 5)
340
+
341
+ if user_num > 0 or job_num > 0:
342
+ recdata.update(user_num, job_num)
343
+
344
+ return{
345
+ info: gr.update(value='New {} users and {} jobs'.format(user_num, job_num),visible=True),
346
+ }
347
+
348
+
349
+ def submit1(k):
350
+ metrics_dict = calculate_all_metrics(k, recdata.S_sub, recdata.U_sub, recdata.P_sub)
351
+ return {
352
+ user_box_plot: plot_user_box(metrics_dict),
353
+ scatter_plot: plot_scatter(k),
354
+ btn2: gr.update(visible=True)
355
+ }
356
+
357
+ def submit2():
358
+ return {
359
+ radio: gr.update(visible=True)
360
+ }
361
+
362
+ def submit3(k):
363
+ metrics_dict = calculate_all_metrics(k, recdata.S_sub, recdata.U_sub, recdata.P_sub)
364
+ return {
365
+ item_plots: plot_item(metrics_dict['rec_per_job'])
366
+ }
367
+
368
+ # layout
369
+ gr.Markdown("## Envy & Inferiority Monitor for Developers Demo")
370
+ # 1. accept k
371
+ with gr.Row():
372
+ with gr.Column(scale=1):
373
+ k = gr.inputs.Slider(minimum=1, maximum=min(30,len(
374
+ recdata.P[0])), default=1, step=1, label='Number of Jobs')
375
+ with gr.Column(scale=1):
376
+ btn = gr.Button('Refresh')
377
+ with gr.Column(scale=1):
378
+ info = gr.Textbox('', label='Updated info', visible=False)
379
+ btn.click(submit_u, inputs=[], outputs=[info])
380
+
381
+
382
+ with gr.Tab('User'):
383
+ plt.close('all')
384
+ btn1 = gr.Button('Visualize user-side fairness')
385
+ user_box_plot = gr.Plot()
386
+ scatter_plot = gr.Plot()
387
+
388
+ btn2 = gr.Button('Visualize intra-group fairness', visible=False)
389
+
390
+ radio = gr.Radio(choices=user_groups, value=user_groups[0] if len(user_groups) > 0 else "",
391
+ interactive=True, label="User group", visible=False)
392
+
393
+ btn1.click(submit1, inputs=[k], outputs=[
394
+ user_box_plot, scatter_plot, btn2])
395
+ btn2.click(submit2, inputs=[], outputs=[radio])
396
+ radio.change(fn=plot_scatter, inputs=[
397
+ k, radio], outputs=[scatter_plot])
398
+
399
+ with gr.Tab('Item'):
400
+ plt.close('all')
401
+ btn3 = gr.Button('Visualize item-side fairness')
402
+ item_plots = gr.Plot()
403
+ btn3.click(submit3, inputs=[k], outputs=[item_plots])
404
+
405
+ return demo
406
+
407
+
408
+ @hydra.main(version_base=None, config_path='./utils', config_name='monitor')
409
+ def main(config: DictConfig):
410
+ print(config)
411
+ Ufile = config.Ufile
412
+ Sfile = config.Sfile
413
+ Pfile = config.Pfile
414
+ user_meta_file = config.user_meta_file
415
+ job_meta_file = config.job_meta_file
416
+ user_groups = ['None'] + \
417
+ list(config.user_groups) if config.user_groups else ['None']
418
+ server_name = config.server_name
419
+ role = config.role
420
+ if role == 'user':
421
+ demo = user_interface(Ufile, Pfile, Sfile,
422
+ job_meta_file, user_meta_file, user_groups)
423
+ elif role == 'developer':
424
+ demo = developer_interface(
425
+ Ufile, Pfile, Sfile, job_meta_file, user_meta_file, user_groups)
426
+ demo.launch(server_name=server_name, server_port=config.server_port)
427
+ # demo.launch()
428
 
 
 
429
 
430
+ if __name__ == "__main__":
431
+ main()
fake_data/P.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf24f3cfccdc58272aad4bd3c81b55e6f9c527842f4f0f9cb9b8109b4957bbeb
3
+ size 10158
fake_data/S.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d48ed2befd1b8a4039342b2545202488ccb4b0b8ddfff93cef9c12730a1db8e5
3
+ size 10158
fake_data/U.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb03868752b74be97ad6f307cbfbf8b36c3de2d9eca5080a7d6f59890ab03d6
3
+ size 10158
fake_data/user_metadata.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737f77fccd84770a721abf1113222a370752c52cf72f7ef312cc5502128c659b
3
+ size 1644
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.32.0
2
+ hydra-core==1.3.2
3
+ matplotlib==3.7.1
4
+ numpy==1.23.5
5
+ omegaconf==2.3.0
6
+ pandas==1.5.3
7
+ scipy==1.10.1
8
+ seaborn==0.12.2
9
+ torch==2.0.0
utils/data.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ from pathlib import Path
4
+ import string
5
+ import random
6
+ import torch
7
+ import numpy as np
8
+ import pickle
9
+ import pandas as pd
10
+ import os
11
+ import json
12
+ import re
13
+
14
+
15
+
16
+ # refactor as a class with the following methods
17
+ class Data:
18
+ def __init__(self, Ufile, Pfile, Sfile, job_meta_file, user_meta_file, user_groups=None, sub_sample_size=1000):
19
+ self.U, self.P, self.S, self.job_metadata, self.job_metadata_reverse, self.user_metadata = self.load_data(Pfile, Sfile, Ufile, job_meta_file, user_meta_file)
20
+ # subsample the data
21
+ self.U_sub = self.sub_sample(self.U, sub_sample_size)
22
+ self.P_sub = self.sub_sample(self.P, sub_sample_size)
23
+ self.S_sub = self.sub_sample(self.S, sub_sample_size)
24
+ # self.U_sub = self.U
25
+ # self.P_sub = self.P
26
+ # self.S_sub = self.S
27
+ self.lookup_dict = {}
28
+ self.user_temp_data = {}
29
+ self.user_groups = user_groups
30
+
31
+
32
+ def load_data(self, Pfile, Sfile, Ufile, job_meta_file, user_meta_file):
33
+ U = torch.from_numpy(pickle.load(open(Ufile, 'rb')))
34
+ recommendations = torch.from_numpy(pickle.load(open(Pfile, 'rb')))
35
+ m, n = recommendations.shape
36
+ if Sfile:
37
+ S = torch.from_numpy(pickle.load(open(Sfile, 'rb')))
38
+ else:
39
+ S = U
40
+ if job_meta_file:
41
+ job_metadata = pickle.load(open(job_meta_file, 'rb'))
42
+ else:
43
+ job_metadata = {}
44
+ for i in range(n):
45
+ job_metadata[i] = 'Job {}'.format(i)
46
+ job_metadata_reverse = {v.capitalize(): k for k, v in job_metadata.items()}
47
+ if user_meta_file is not None:
48
+ user_metadata = pickle.load(open(user_meta_file, 'rb'))
49
+ else:
50
+ user_metadata = None
51
+
52
+ return U, recommendations, S, job_metadata, job_metadata_reverse, user_metadata
53
+
54
+ def sub_sample(self, M, sample_size=500):
55
+ if len(M) > sample_size and len(M[0]) > sample_size:
56
+ # take the first sample_size columns and rows of M, copy without touching the original
57
+ M = M[:sample_size, :sample_size].clone()
58
+ return M
59
+
60
+ def update(self, new_user_num, new_job_num):
61
+ # refactor this function
62
+ # recdata.lookup_dict = {}
63
+ # user_temp_data = {}
64
+ # U = add_jobs(U, new_job_num)
65
+ # recommendations = update_P(recommendations, new_job_num, 0)
66
+ # generate a random float between 0 and 1
67
+ # prob = random.random()
68
+ # if prob > 0.2:
69
+ # recommendations[int(user),-1] = 1.
70
+ # S = add_jobs(S, new_job_num)
71
+ # U, recommendations, S = add_jobs(U, new_job_num), add_jobs(recommendations, new_job_num), add_jobs(S, new_job_num)
72
+ # job_metadata = update_job_metadata(job_metadata, new_job_num)
73
+ # job_metadata_reverse = {v: k for k, v in job_metadata.items()}
74
+ if new_job_num > 0 or new_user_num > 0:
75
+ self.U_sub = self.add_jobs_users(self.U_sub, self.U, new_job_num, new_user_num)
76
+ self.P_sub = self.add_jobs_users(self.P_sub, self.P, new_job_num, new_user_num)
77
+ self.S_sub = self.add_jobs_users(self.S_sub, self.S, new_job_num, new_user_num)
78
+ print('U_sub shape: ', self.U_sub.shape)
79
+ print('P_sub shape: ', self.P_sub.shape)
80
+ print('S_sub shape: ', self.S_sub.shape)
81
+ self.update_job_metadata(new_job_num)
82
+ self.update_user_metadata(new_user_num)
83
+ self.lookup_dict = {}
84
+ self.user_temp_data = {}
85
+
86
+
87
+
88
+ # def shuffle_rec(P):
89
+ # rand_rec = P.copy()
90
+ # rand_rec = rand_rec[:,np.random.permutation(rand_rec.shape[1])]
91
+ # return rand_rec
92
+
93
+ def add_jobs(self, M_sub, M, new_job_num): # refactor this function, accept one matrix as input
94
+ if new_job_num == 0:
95
+ return M_sub
96
+ if len(M[0]) > len(M_sub[0]) + new_job_num:
97
+ M_updated = M[:len(M_sub), :len(M_sub[0]) + new_job_num].clone()
98
+ else:
99
+ # random number between 0 and 1 with size (S.shape[0],new_job_num)
100
+ new_jobM = np.random.rand(M.shape[0], new_job_num)
101
+ # concat new jobM to M as new columns
102
+ M_updated = np.concatenate((M_sub, new_jobM), axis=1)
103
+
104
+ return M_updated
105
+
106
+ def add_users(self, M_sub, M, new_user_num): # refactor this function, accept one matrix as input
107
+ if new_user_num == 0:
108
+ return M_sub
109
+ if len(M) > len(M_sub) + new_user_num:
110
+ M_updated = M[:len(M_sub) + new_user_num, :len(M_sub[0])].clone()
111
+ else:
112
+ # random number between 0 and 1 with size (new_user_num,S.shape[1])
113
+ new_userM = np.random.rand(new_user_num, M.shape[1])
114
+ # concat new userM to M as new rows
115
+ M_updated = np.concatenate((M_sub, new_userM), axis=0)
116
+
117
+ return M_updated
118
+
119
+
120
+ def add_jobs_users(self, M_sub, M, new_job_num, new_user_num):
121
+ # use add_jobs and add_users to add new jobs and users
122
+ M_updated = self.add_jobs(M_sub, M, new_job_num)
123
+ M_updated = self.add_users(M_updated, M, new_user_num)
124
+ print('M_updated shape: ', M_updated.shape)
125
+ return M_updated
126
+
127
+ def tweak_P(self, this_user):
128
+ # generate a random float between 0 and 1
129
+ prob = random.random()
130
+ if prob > 0.2:
131
+ self.P_sub[int(this_user),-1] = 1.
132
+ # 1 random indices of users within the range of P.shape[0]
133
+ user_indices = np.random.randint(0, self.P_sub.shape[0], 1)
134
+ self.P_sub[user_indices, -1] = 1.
135
+
136
+ def update_job_metadata(self, new_job_num):
137
+ if len(self.P_sub[0]) > len(self.P[0]):
138
+ for i in range(new_job_num):
139
+ self.job_metadata[len(self.job_metadata)] = 'Job {}'.format(len(self.job_metadata))
140
+ self.job_metadata_reverse['Job {}'.format(len(self.job_metadata_reverse))] = len(self.job_metadata_reverse)
141
+
142
+
143
+ def update_user_metadata(self, new_user_num): # TODO: generate fake user metadata for CB
144
+ if new_user_num > 0:
145
+ if len(self.P_sub) > len(self.P):
146
+ # make a new dataframe with new user metadata
147
+ new_user_metadata = {}
148
+ new_user_metadata['Id'] = [str(i) for i in range(len(self.user_metadata), len(self.user_metadata) + new_user_num)]
149
+ new_user_metadata['Sex'] = np.random.choice([0, 1], size=new_user_num, p=[.4, .6])
150
+ new_user_metadata['Edu'] = np.random.choice([0, 1, 2], size=new_user_num, p=[.2, .6, 0.2])
151
+ new_user_metadata = pd.DataFrame(new_user_metadata)
152
+ new_user_metadata['Sex'] = new_user_metadata['Sex'].map({0:'F', 1:'M'})
153
+ new_user_metadata['Edu'] = new_user_metadata['Edu'].map({0:'High school', 1:'College', 2:'Graduate+'})
154
+ # concat new user metadata to old user metadata
155
+ self.user_metadata = pd.concat([self.user_metadata, new_user_metadata], ignore_index=True)
156
+ # print(user_metadata)
157
+
158
+
159
+
utils/metrics.py ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import numpy as np
5
+ from collections import defaultdict
6
+
7
+ np.set_printoptions(precision=4)
8
+ from scipy.stats import rankdata
9
+
10
+
11
+ """Information Retrieval metrics
12
+ Useful Resources:
13
+ http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
14
+ http://www.nii.ac.jp/TechReports/05-014E.pdf
15
+ http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
16
+ http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
17
+ Learning to Rank for Information Retrieval (Tie-Yan Liu)
18
+ """
19
+
20
+
21
+ def mean_reciprocal_rank(rs):
22
+ """Score is reciprocal of the rank of the first relevant item
23
+ First element is 'rank 1'. Relevance is binary (nonzero is relevant).
24
+ Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
25
+ >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
26
+ >>> mean_reciprocal_rank(rs)
27
+ 0.61111111111111105
28
+ >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
29
+ >>> mean_reciprocal_rank(rs)
30
+ 0.5
31
+ >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
32
+ >>> mean_reciprocal_rank(rs)
33
+ 0.75
34
+ Args:
35
+ rs: Iterator of relevance scores (list or numpy) in rank order
36
+ (first element is the first item)
37
+ Returns:
38
+ Mean reciprocal rank
39
+ """
40
+ rs = (np.asarray(r).nonzero()[0] for r in rs)
41
+ return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])
42
+
43
+
44
+ def r_precision(r):
45
+ """Score is precision after all relevant documents have been retrieved
46
+ Relevance is binary (nonzero is relevant).
47
+ >>> r = [0, 0, 1]
48
+ >>> r_precision(r)
49
+ 0.33333333333333331
50
+ >>> r = [0, 1, 0]
51
+ >>> r_precision(r)
52
+ 0.5
53
+ >>> r = [1, 0, 0]
54
+ >>> r_precision(r)
55
+ 1.0
56
+ Args:
57
+ r: Relevance scores (list or numpy) in rank order
58
+ (first element is the first item)
59
+ Returns:
60
+ R Precision
61
+ """
62
+ r = np.asarray(r) != 0
63
+ z = r.nonzero()[0]
64
+ if not z.size:
65
+ return 0.
66
+ return np.mean(r[:z[-1] + 1])
67
+
68
+
69
+ def precision_at_k(r, k):
70
+ """Score is precision @ k
71
+ Relevance is binary (nonzero is relevant).
72
+ >>> r = [0, 0, 1]
73
+ >>> precision_at_k(r, 1)
74
+ 0.0
75
+ >>> precision_at_k(r, 2)
76
+ 0.0
77
+ >>> precision_at_k(r, 3)
78
+ 0.33333333333333331
79
+ >>> precision_at_k(r, 4)
80
+ Traceback (most recent call last):
81
+ File "<stdin>", line 1, in ?
82
+ ValueError: Relevance score length < k
83
+ Args:
84
+ r: Relevance scores (list or numpy) in rank order
85
+ (first element is the first item)
86
+ Returns:
87
+ Precision @ k
88
+ Raises:
89
+ ValueError: len(r) must be >= k
90
+ """
91
+ assert k >= 1
92
+ r = np.asarray(r)[:k] != 0
93
+ if r.size != k:
94
+ raise ValueError('Relevance score length < k')
95
+ return np.mean(r)
96
+
97
+
98
+ def average_precision(r):
99
+ """Score is average precision (area under PR curve)
100
+ Relevance is binary (nonzero is relevant).
101
+ >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
102
+ >>> delta_r = 1. / sum(r)
103
+ >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
104
+ 0.7833333333333333
105
+ >>> average_precision(r)
106
+ 0.78333333333333333
107
+ Args:
108
+ r: Relevance scores (list or numpy) in rank order
109
+ (first element is the first item)
110
+ Returns:
111
+ Average precision
112
+ """
113
+ r = np.asarray(r) != 0
114
+ out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
115
+ if not out:
116
+ return 0.
117
+ return np.mean(out)
118
+
119
+
120
+ def mean_average_precision(rs):
121
+ """Score is mean average precision
122
+ Relevance is binary (nonzero is relevant).
123
+ >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
124
+ >>> mean_average_precision(rs)
125
+ 0.78333333333333333
126
+ >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
127
+ >>> mean_average_precision(rs)
128
+ 0.39166666666666666
129
+ Args:
130
+ rs: Iterator of relevance scores (list or numpy) in rank order
131
+ (first element is the first item)
132
+ Returns:
133
+ Mean average precision
134
+ """
135
+ return np.mean([average_precision(r) for r in rs])
136
+
137
+
138
+ def dcg_at_k(r, k, method=0):
139
+ """Score is discounted cumulative gain (dcg)
140
+ Relevance is positive real values. Can use binary
141
+ as the previous methods.
142
+ Example from
143
+ http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
144
+ >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
145
+ >>> dcg_at_k(r, 1)
146
+ 3.0
147
+ >>> dcg_at_k(r, 1, method=1)
148
+ 3.0
149
+ >>> dcg_at_k(r, 2)
150
+ 5.0
151
+ >>> dcg_at_k(r, 2, method=1)
152
+ 4.2618595071429155
153
+ >>> dcg_at_k(r, 10)
154
+ 9.6051177391888114
155
+ >>> dcg_at_k(r, 11)
156
+ 9.6051177391888114
157
+ Args:
158
+ r: Relevance scores (list or numpy) in rank order
159
+ (first element is the first item)
160
+ k: Number of results to consider
161
+ method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
162
+ If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
163
+ Returns:
164
+ Discounted cumulative gain
165
+ """
166
+ r = np.asfarray(r)[:k]
167
+ if r.size:
168
+ if method == 0:
169
+ return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
170
+ elif method == 1:
171
+ return np.sum(r / np.log2(np.arange(2, r.size + 2)))
172
+ else:
173
+ raise ValueError('method must be 0 or 1.')
174
+ return 0.
175
+
176
+
177
+ def ndcg_at_k(r, k, method=0):
178
+ """Score is normalized discounted cumulative gain (ndcg)
179
+ Relevance is positive real values. Can use binary
180
+ as the previous methods.
181
+ Example from
182
+ http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
183
+ >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
184
+ >>> ndcg_at_k(r, 1)
185
+ 1.0
186
+ >>> r = [2, 1, 2, 0]
187
+ >>> ndcg_at_k(r, 4)
188
+ 0.9203032077642922
189
+ >>> ndcg_at_k(r, 4, method=1)
190
+ 0.96519546960144276
191
+ >>> ndcg_at_k([0], 1)
192
+ 0.0
193
+ >>> ndcg_at_k([1], 2)
194
+ 1.0
195
+ Args:
196
+ r: Relevance scores (list or numpy) in rank order
197
+ (first element is the first item)
198
+ k: Number of results to consider
199
+ method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
200
+ If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
201
+ Returns:
202
+ Normalized discounted cumulative gain
203
+ """
204
+ dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
205
+ if not dcg_max:
206
+ return 0.
207
+ return dcg_at_k(r, k, method) / dcg_max
208
+
209
+
210
+ """
211
+ Wealth inequality
212
+ """
213
+
214
+
215
+ def gini(arr):
216
+ ## Gini = \frac{2\sum_i^n i\times y_i}{n\sum_i^n y_i} - \frac{n+1}{n}
217
+ sorted_arr = arr.copy()
218
+ sorted_arr.sort()
219
+ n = arr.size
220
+ coef_ = 2. / n
221
+ const_ = (n + 1.) / n
222
+ weighted_sum = sum([(i + 1) * yi for i, yi in enumerate(sorted_arr)])
223
+ return coef_ * weighted_sum / (sorted_arr.sum()) - const_
224
+
225
+
226
+ """
227
+ Expected envy and inferiority under probabilistic recommendation as weighted sampling with replacement
228
+ """
229
+
230
+
231
+ def expected_utility_u(Ru, ps, k):
232
+ return Ru @ ps * k
233
+
234
+
235
+ def expected_utility(R, Pi, k):
236
+ U = (R * Pi * k).sum(axis=1)
237
+ # if not agg:
238
+ return U
239
+
240
+
241
+ def expected_envy_u_v(Ru, pus, pvs, k):
242
+ return Ru @ (pvs - pus) * k
243
+
244
+
245
+ def prob_in(ps, k):
246
+ return 1 - (1 - ps) ** k
247
+
248
+
249
+ def prob_in_approx(ps, k):
250
+ return k * ps
251
+
252
+
253
+ def expected_inferiority_u_v(Ru, Rv, pus, pvs, k, compensate=False, approx=False):
254
+ differ = Rv - Ru
255
+ if not compensate:
256
+ differ = np.clip(differ, a_min=0, a_max=None)
257
+ if not approx:
258
+ return differ @ (prob_in(pus, k) * prob_in(pvs, k))
259
+ else:
260
+ return differ @ (prob_in_approx(pus, k) * prob_in_approx(pvs, k))
261
+
262
+
263
+ def expected_envy(R, Pi, k):
264
+ """
265
+ Measure expected envy for k-sized recommendation according to rec strategy Pi with respect to relevancy scores R
266
+ :param R: m x n real-valued matrix
267
+ :param Pi: m x n Markov matrix
268
+ :return: E: m x n envy matrix where Euv = envy from u to v if not agg, sum of E if agg
269
+ """
270
+ assert np.all(np.isclose(Pi.sum(axis=1), 1.)) or np.array_equal(Pi,
271
+ Pi.astype(bool)) # binary matrix for discrete rec
272
+ m, n = len(R), len(R[0])
273
+ E = np.zeros((m, m))
274
+ for u in range(m):
275
+ for v in range(m):
276
+ if v == u:
277
+ continue
278
+ E[u, v] = expected_envy_u_v(R[u], Pi[u], Pi[v], k=k)
279
+ E = np.clip(E, a_min=0., a_max=None)
280
+ # if not agg:
281
+ return E
282
+
283
+
284
+ def expected_inferiority(R, Pi, k, compensate=True, approx=False):
285
+ """
286
+ Measure expected inferiority for k-sized recommendation according to rec strategy Pi with respect to relevancy scores R
287
+ :param R:
288
+ :param Pi:
289
+ :param k:
290
+ :param agg:
291
+ :return: I: m x n
292
+ """
293
+ assert np.all(np.isclose(Pi.sum(axis=1), 1.)) or np.array_equal(Pi,
294
+ Pi.astype(bool)) # binary matrix for discrete rec
295
+ m, n = len(R), len(R[0])
296
+ I = np.zeros((m, m))
297
+ for u in range(m):
298
+ for v in range(m):
299
+ if v == u:
300
+ continue
301
+ I[u, v] = expected_inferiority_u_v(R[u], R[v], Pi[u], Pi[v], k=k, approx=approx, compensate=compensate)
302
+
303
+ I = np.clip(I, a_min=0., a_max=None)
304
+ # if not agg:
305
+ return I
306
+
307
+
308
+ def expected_envy_torch(R, Pi, k):
309
+ m, n = len(R), len(R[0])
310
+ E = torch.zeros(m, m)
311
+ for u in range(m):
312
+ for v in range(m):
313
+ if v == u:
314
+ continue
315
+ E[u, v] = expected_envy_u_v(R[u], Pi[u], Pi[v], k=k)
316
+ E = torch.clamp(E, min=0.)
317
+ return E
318
+
319
+
320
+ def expected_envy_torch_vec(R, P, k):
321
+ res = R @ P.transpose(0, 1)
322
+ envy_mat = (res - torch.diagonal(res, 0).reshape(-1, 1))
323
+ return k * (torch.clamp(envy_mat, min=0.))
324
+
325
+
326
+ def expected_inferiority_torch(R, Pi, k, compensate=False, approx=False):
327
+ m, n = R.shape
328
+ I = torch.zeros((m, m))
329
+ for u in range(m):
330
+ for v in range(m):
331
+ if v == u:
332
+ continue
333
+ if not approx:
334
+ joint_prob = prob_in(Pi[v], k) * prob_in(Pi[u], k)
335
+ else:
336
+ joint_prob = prob_in_approx(Pi[v], k) * prob_in_approx(Pi[u], k)
337
+
338
+ if not compensate:
339
+ I[u, v] = torch.clamp(R[v] - R[u], min=0., max=None) @ joint_prob
340
+ else:
341
+ I[u, v] = (R[v] - R[u]) @ joint_prob
342
+
343
+ return torch.clamp(I, min=0.)
344
+
345
+
346
+ def expected_inferiority_torch_vec(R, P, k, compensate=False, approx=False):
347
+ m, n = R.shape
348
+ I = torch.zeros((m, m))
349
+ P_pow_k = 1 - (1 - P).pow(k) if not approx else P * k
350
+ for i in range(m):
351
+ first_term = torch.clamp(R - R[i], min=0.) if not compensate else R - R[i]
352
+ I[i] = (first_term * (P_pow_k[i] * P_pow_k)).sum(1)
353
+ return I
354
+
355
+
356
+ def slow_onehot(idx, P):
357
+ m = P.shape[0]
358
+ res = torch.zeros_like(P)
359
+ for i in range(m):
360
+ res[i, idx[i]] = 1.
361
+ return res
362
+
363
+
364
+ def eiu_cut_off(R, Pi, k, agg=True):
365
+ """
366
+ Evaluate envy, inferiority, utility based on top-k cut-off recommendation
367
+ :param R:
368
+ :param Pi:
369
+ :return: envy, inferiority, utility
370
+ """
371
+ # print('Start evaluation!')
372
+ m, n = R.shape
373
+ # _, rec = torch.topk(Pi, k, dim=1)
374
+ # rec_onehot = F.one_hot(rec, num_classes=n).sum(1).float()
375
+ rec_onehot = slow_onehot(torch.topk(Pi, k, dim=1)[1], Pi)
376
+ envy = expected_envy_torch_vec(R, rec_onehot, k=1)
377
+ inferiority = expected_inferiority_torch_vec(R, rec_onehot, k=1, compensate=False, approx=False)
378
+ utility = expected_utility(R, rec_onehot, k=1)
379
+ if agg:
380
+ envy = envy.sum(-1).mean()
381
+ inferiority = inferiority.sum(-1).mean()
382
+ utility = utility.mean()
383
+ return envy, inferiority, utility
384
+
385
+
386
+ def eiu_cut_off2(R, Pi, k, agg=True):
387
+ """
388
+ Evaluate envy, inferiority, utility based on top-k cut-off recommendation
389
+ :param R:
390
+ :param Pi:
391
+ :return: envy, inferiority, utility
392
+ """
393
+ # print('Start evaluation!')
394
+ S, U = R
395
+ if not isinstance(S, torch.Tensor):
396
+ S = torch.tensor(S)
397
+ if not isinstance(U, torch.Tensor):
398
+ U = torch.tensor(U)
399
+ if not isinstance(Pi, torch.Tensor):
400
+ Pi = torch.tensor(Pi)
401
+ m, n = U.shape
402
+ # _, rec = torch.topk(Pi, k, dim=1)
403
+ # rec_onehot = F.one_hot(rec, num_classes=n).sum(1).float()
404
+ rec_onehot = slow_onehot(torch.topk(Pi, k, dim=1)[1], Pi)
405
+ envy = expected_envy_torch_vec(U, rec_onehot, k=1)
406
+ inferiority = expected_inferiority_torch_vec(S, rec_onehot, k=1, compensate=False, approx=False)
407
+ utility = expected_utility(U, rec_onehot, k=1)
408
+ if agg:
409
+ envy = envy.sum(-1).mean()
410
+ inferiority = inferiority.sum(-1).mean()
411
+ utility = utility.mean()
412
+ return envy, inferiority, utility
413
+
414
+
415
+ """
416
+ Global congestion metrics
417
+ """
418
+
419
+
420
+ def get_competitors(rec_per_job, rec):
421
+ m = rec.shape[0]
422
+ competitors = []
423
+ for i in range(m):
424
+ if len(rec[i]) == 1:
425
+ competitors.append([rec_per_job[rec[i]]])
426
+ else:
427
+ competitors.append(rec_per_job[rec[i]])
428
+ return np.array(competitors)
429
+
430
+
431
+ def get_better_competitor_scores(rec, R):
432
+ m, n = R.shape
433
+ _, k = rec.shape
434
+ user_ids_per_job = defaultdict(list)
435
+ for i, r in enumerate(rec):
436
+ for j in r:
437
+ user_ids_per_job[j.item()].append(i)
438
+
439
+ mean_competitor_scores_per_job = np.zeros((m, k))
440
+ for i in range(m):
441
+ my_rec_jobs = rec[i].numpy()
442
+
443
+ my_mean_competitors = np.zeros(k)
444
+ for j_, j in enumerate(my_rec_jobs):
445
+ my_score = R[i, j]
446
+ all_ids = user_ids_per_job[j].copy()
447
+ all_ids.remove(i)
448
+ other_scores = R[all_ids, j]
449
+ if not all_ids:
450
+ other_scores = np.zeros(1) # TODO if no competition, then it is the negative of my own score
451
+ my_mean_competitors[j_] = other_scores.mean() - my_score
452
+ # my_mean_competitors[my_mean_competitors < 0] = 0. # TODO only keep the better competitors
453
+ mean_competitor_scores_per_job[i] = my_mean_competitors
454
+ return mean_competitor_scores_per_job
455
+
456
+
457
+ def get_num_better_competitors(rec, R):
458
+ m, n = R.shape
459
+ _, k = rec.shape
460
+ user_ids_per_job = defaultdict(list)
461
+ for i, r in enumerate(rec):
462
+ for j in r:
463
+ user_ids_per_job[j.item()].append(i)
464
+
465
+ num_better_competitors = np.zeros((m, k))
466
+ for i in range(m):
467
+ my_rec_jobs = rec[i].numpy()
468
+
469
+ better_competitors = np.zeros(k)
470
+ for j_, j in enumerate(my_rec_jobs):
471
+ my_score = R[i, j]
472
+ all_ids = user_ids_per_job[j].copy()
473
+ all_ids.remove(i)
474
+ other_scores = R[all_ids, j]
475
+ better_competitors[j_] = ((other_scores - my_score) > 0).sum()
476
+ num_better_competitors[i] = better_competitors
477
+ return num_better_competitors
478
+
479
+
480
+ def get_scores_ids_per_job(rec, R):
481
+ scores_per_job = defaultdict(list)
482
+ ids_per_job = defaultdict(list)
483
+
484
+ for i in range(len(rec)):
485
+ u = rec[i]
486
+ for jb in u:
487
+ jb = jb.item()
488
+ ids_per_job[jb].append(i)
489
+ scores_per_job[jb].append(R[i, jb].item())
490
+ return scores_per_job, ids_per_job
491
+
492
+
493
+ def get_rank(a, method='ordinal', axis=None, descending=False):
494
+ if descending:
495
+ a = np.array(a) * -1
496
+ return stats.rankdata(a, method=method, axis=axis)
497
+
498
+
499
+ def get_ranks_per_job(scores_rec):
500
+ ranks_per_job = defaultdict(list)
501
+ for jb in scores_rec:
502
+ ranks_per_job[jb] = get_rank(scores_rec[jb], descending=True)
503
+ return ranks_per_job
504
+
505
+
506
+ def get_ranks_per_user(ranks_per_job, ids_per_job):
507
+ for k, v in ranks_per_job.items():
508
+ ranks_per_job[k] = [i - 1 for i in v]
509
+ ranks_per_user = defaultdict(list)
510
+ for k, v in ids_per_job.items():
511
+ rks = ranks_per_job[k]
512
+ for i, u in enumerate(v):
513
+ ranks_per_user[u].append(rks[i])
514
+ return ranks_per_user
515
+
516
+
517
+ def calculate_global_metrics(res, R, k=10):
518
+ # get rec
519
+ m, n = res.shape
520
+ if not torch.is_tensor(res):
521
+ res = torch.from_numpy(res)
522
+ _, rec = torch.topk(res, k, dim=1)
523
+ rec_onehot = slow_onehot(rec, res)
524
+ # rec_onehot = F.one_hot(rec, num_classes=n).sum(1).float()
525
+ try:
526
+ rec_per_job = rec_onehot.sum(axis=0).numpy()
527
+ except:
528
+ rec_per_job = rec_onehot.sum(axis=0).cpu().numpy()
529
+ rec = rec.cpu()
530
+ R = R.cpu()
531
+ opt_competitors = get_competitors(rec_per_job, rec)
532
+
533
+ # mean competitors per person
534
+ mean_competitors = opt_competitors.mean()
535
+
536
+ # mean better competitors per person
537
+ mean_better_competitors = get_num_better_competitors(rec, R).mean()
538
+
539
+ # mean competitor scores - my score
540
+ mean_diff_scores = get_better_competitor_scores(rec, R)
541
+ mean_diff_scores[mean_diff_scores < 0] = 0.
542
+ mean_diff_scores = mean_diff_scores.mean()
543
+
544
+ # mean rank
545
+ # scores_opt, ids_opt = get_scores_ids_per_job(rec, R)
546
+ # ranks_opt = get_ranks_per_job(scores_opt)
547
+ # ranks_per_user_opt = get_ranks_per_user(ranks_opt, ids_opt)
548
+ # mean_rank = np.array(list(ranks_per_user_opt.values())).mean()
549
+
550
+ # gini
551
+ gini_index = gini(rec_per_job)
552
+
553
+ return {'mean_competitors': mean_competitors, 'mean_better_competitors': mean_better_competitors, \
554
+ 'mean_scores_diff': mean_diff_scores, 'mean_rank': mean_better_competitors, 'gini_index': gini_index}
555
+
556
+
557
+ def calculate_global_metrics2(res, R, k=10):
558
+ # get rec
559
+ S, U = R
560
+ m, n = res.shape
561
+ if not torch.is_tensor(res):
562
+ res = torch.from_numpy(res)
563
+ _, rec = torch.topk(res, k, dim=1)
564
+ rec_onehot = slow_onehot(rec, res)
565
+ # rec_onehot = F.one_hot(rec, num_classes=n).sum(1).float()
566
+ try:
567
+ rec_per_job = rec_onehot.sum(axis=0).numpy()
568
+ except:
569
+ rec_per_job = rec_onehot.sum(axis=0).cpu().numpy()
570
+ rec = rec.cpu()
571
+ S = S.cpu()
572
+ U = U.cpu()
573
+ opt_competitors = get_competitors(rec_per_job, rec)
574
+
575
+ # mean competitors per person
576
+ mean_competitors = opt_competitors.mean()
577
+
578
+ # mean better competitors per person
579
+ mean_better_competitors = get_num_better_competitors(rec, S).mean()
580
+
581
+ # mean competitor scores - my score
582
+ mean_diff_scores = get_better_competitor_scores(rec, S)
583
+ mean_diff_scores[mean_diff_scores < 0] = 0.
584
+ mean_diff_scores = mean_diff_scores.mean()
585
+
586
+ # mean rank
587
+ scores_opt, ids_opt = get_scores_ids_per_job(rec, S)
588
+ ranks_opt = get_ranks_per_job(scores_opt)
589
+ ranks_per_user_opt = get_ranks_per_user(ranks_opt, ids_opt)
590
+ mean_rank = np.array(list(ranks_per_user_opt.values())).mean()
591
+
592
+ # gini
593
+ gini_index = gini(rec_per_job)
594
+
595
+ return {'mean_competitors': mean_competitors, 'mean_better_competitors': mean_better_competitors, \
596
+ 'mean_scores_diff': mean_diff_scores, 'mean_rank': mean_rank, 'gini_index': gini_index}
597
+
598
+ def get_scores_per_job(rec, S):
599
+ scores_per_job = defaultdict(list)
600
+ for i in range(len(rec)):
601
+ u = rec[i]
602
+ for jb in u:
603
+ jb = jb.item()
604
+ scores_per_job[jb].append(S[i, jb].item())
605
+ return scores_per_job
606
+
utils/monitor.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ufile: data/U.pkl
2
+ # Sfile: data/S.pkl
3
+ # Pfile: data/P.pkl
4
+ # user_meta_file: data/user_meta_data.pkl
5
+ # user_groups: ['degree']
6
+ # job_meta_file: data/job_meta_data.pkl
7
+ Ufile: fake_data/U.pkl
8
+ Sfile: fake_data/S.pkl
9
+ Pfile: fake_data/P.pkl
10
+ user_meta_file: fake_data/user_metadata.pkl
11
+ user_groups: ['Sex', 'Edu']
12
+ job_meta_file: ''
13
+ server_name: '10.10.133.46'
14
+ server_port: 1122
15
+ role: user