nlpblogs commited on
Commit
9770541
·
verified ·
1 Parent(s): eb38733

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -533
app.py CHANGED
@@ -40,11 +40,11 @@ with st.sidebar:
40
  ''')
41
 
42
 
43
- st.subheader("Candidate Profile 1", divider = "green")
44
 
45
  txt = st.text_area("Job description", key = "text 1")
46
  job = pd.Series(txt, name="Text")
47
-
48
 
49
  if 'upload_count' not in st.session_state:
50
  st.session_state['upload_count'] = 0
@@ -53,552 +53,67 @@ max_attempts = 2
53
 
54
  if st.session_state['upload_count'] < max_attempts:
55
  uploaded_files = st.file_uploader(
56
- "Upload your resume in .pdf format", type="pdf", key="candidate 1"
57
  )
58
- if uploaded_files:
59
- st.session_state['upload_count'] += 1
60
- for uploaded_file in uploaded_files:
61
- pdf_reader = PdfReader(uploaded_file)
62
- text_data = ""
63
- for page in pdf_reader.pages:
64
- text_data += page.extract_text()
65
- data = pd.Series(text_data, name = 'Text')
66
- frames = [job, data]
67
- result = pd.concat(frames)
68
-
69
-
70
- model = GLiNER.from_pretrained("urchade/gliner_base")
71
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
72
- entities = model.predict_entities(text_data, labels)
73
- df = pd.DataFrame(entities)
74
-
75
-
76
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
77
- values='score', color='label')
78
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
79
- st.plotly_chart(fig1, key = "figure 1")
80
-
81
- vectorizer = TfidfVectorizer()
82
- tfidf_matrix = vectorizer.fit_transform(result)
83
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
84
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
85
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
86
-
87
-
88
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
89
- x=['Resume 1', 'Jon Description'],
90
- y=['Resume 1', 'Job Description'])
91
- st.plotly_chart(fig2, key = "figure 2")
92
-
93
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
94
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
95
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
96
-
97
- else:
98
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
99
 
100
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
101
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
102
-
103
-
104
-
105
-
106
-
107
- st.subheader("Candidate Profile 2", divider = "green")
108
-
109
- txt = st.text_area("Job description", key = "text 2")
110
- job = pd.Series(txt, name="Text")
111
- if 'upload_count' not in st.session_state:
112
- st.session_state['upload_count'] = 0
113
- max_attempts = 2
114
- if st.session_state['upload_count'] < max_attempts:
115
- uploaded_files = st.file_uploader(
116
- "Upload your resume in .pdf format", type="pdf", key="candidate 2"
117
- )
118
- if uploaded_files:
119
- st.session_state['upload_count'] += 1
120
- for uploaded_file in uploaded_files:
121
- pdf_reader = PdfReader(uploaded_file)
122
- text_data = ""
123
- for page in pdf_reader.pages:
124
- text_data += page.extract_text()
125
- data = pd.Series(text_data, name = 'Text')
126
- frames = [job, data]
127
- result = pd.concat(frames)
128
-
129
-
130
- model = GLiNER.from_pretrained("urchade/gliner_base")
131
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
132
- entities = model.predict_entities(text_data, labels)
133
- df = pd.DataFrame(entities)
134
-
135
-
136
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
137
- values='score', color='label')
138
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
139
- st.plotly_chart(fig1, key = "figure 3")
140
-
141
- vectorizer = TfidfVectorizer()
142
- tfidf_matrix = vectorizer.fit_transform(result)
143
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
144
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
145
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
146
-
147
 
148
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
149
- x=['Resume 1', 'Jon Description'],
150
- y=['Resume 1', 'Job Description'])
151
- st.plotly_chart(fig2, key = "figure 4")
152
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
153
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
154
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
155
- else:
156
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
157
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
158
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
159
-
160
-
161
-
162
-
163
-
164
-
165
- st.subheader("Candidate Profile 3", divider = "green")
166
-
167
- txt = st.text_area("Job description", key = "text 3")
168
- job = pd.Series(txt, name="Text")
169
- if 'upload_count' not in st.session_state:
170
- st.session_state['upload_count'] = 0
171
- max_attempts = 2
172
- if st.session_state['upload_count'] < max_attempts:
173
- uploaded_files = st.file_uploader(
174
- "Upload your resume in .pdf format", type="pdf", key="candidate 3"
175
- )
176
- if uploaded_files:
177
- st.session_state['upload_count'] += 1
178
- for uploaded_file in uploaded_files:
179
- pdf_reader = PdfReader(uploaded_file)
180
- text_data = ""
181
- for page in pdf_reader.pages:
182
- text_data += page.extract_text()
183
- data = pd.Series(text_data, name = 'Text')
184
- frames = [job, data]
185
- result = pd.concat(frames)
186
-
187
-
188
- model = GLiNER.from_pretrained("urchade/gliner_base")
189
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
190
- entities = model.predict_entities(text_data, labels)
191
- df = pd.DataFrame(entities)
192
-
193
-
194
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
195
  values='score', color='label')
196
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
197
- st.plotly_chart(fig1, key = "figure 5")
198
-
199
- vectorizer = TfidfVectorizer()
200
- tfidf_matrix = vectorizer.fit_transform(result)
201
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
202
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
203
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
204
-
205
 
206
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
207
- x=['Resume 1', 'Jon Description'],
208
- y=['Resume 1', 'Job Description'])
209
- st.plotly_chart(fig2, key = "figure 6")
210
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
211
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
212
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
213
- else:
214
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
215
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
216
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
217
-
218
-
219
- st.subheader("Candidate Profile 4", divider = "green")
220
-
221
- txt = st.text_area("Job description", key = "text 4")
222
- job = pd.Series(txt, name="Text")
223
- if 'upload_count' not in st.session_state:
224
- st.session_state['upload_count'] = 0
225
- max_attempts = 2
226
- if st.session_state['upload_count'] < max_attempts:
227
- uploaded_files = st.file_uploader(
228
- "Upload your resume in .pdf format", type="pdf", key="candidate 4"
229
- )
230
- if uploaded_files:
231
- st.session_state['upload_count'] += 1
232
- for uploaded_file in uploaded_files:
233
- pdf_reader = PdfReader(uploaded_file)
234
- text_data = ""
235
- for page in pdf_reader.pages:
236
- text_data += page.extract_text()
237
- data = pd.Series(text_data, name = 'Text')
238
- frames = [job, data]
239
- result = pd.concat(frames)
240
-
241
-
242
- model = GLiNER.from_pretrained("urchade/gliner_base")
243
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
244
- entities = model.predict_entities(text_data, labels)
245
- df = pd.DataFrame(entities)
246
-
247
-
248
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
249
- values='score', color='label')
250
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
251
- st.plotly_chart(fig1, key = "figure 7")
252
-
253
- vectorizer = TfidfVectorizer()
254
- tfidf_matrix = vectorizer.fit_transform(result)
255
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
256
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
257
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
258
-
259
 
260
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
261
- x=['Resume 1', 'Jon Description'],
262
- y=['Resume 1', 'Job Description'])
263
- st.plotly_chart(fig2, key = "figure 8")
264
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
265
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
266
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
267
- else:
268
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
269
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
270
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
271
-
272
-
273
-
274
-
275
 
276
- st.subheader("Candidate Profile 5", divider = "green")
277
-
278
- txt = st.text_area("Job description", key = "text 5")
279
- job = pd.Series(txt, name="Text")
280
- if 'upload_count' not in st.session_state:
281
- st.session_state['upload_count'] = 0
282
- max_attempts = 2
283
- if st.session_state['upload_count'] < max_attempts:
284
- uploaded_files = st.file_uploader(
285
- "Upload your resume in .pdf format", type="pdf", key="candidate 5"
286
- )
287
- if uploaded_files:
288
- st.session_state['upload_count'] += 1
289
- for uploaded_file in uploaded_files:
290
- pdf_reader = PdfReader(uploaded_file)
291
- text_data = ""
292
- for page in pdf_reader.pages:
293
- text_data += page.extract_text()
294
- data = pd.Series(text_data, name = 'Text')
295
- frames = [job, data]
296
- result = pd.concat(frames)
297
-
298
-
299
- model = GLiNER.from_pretrained("urchade/gliner_base")
300
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
301
- entities = model.predict_entities(text_data, labels)
302
- df = pd.DataFrame(entities)
303
-
304
-
305
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
306
- values='score', color='label')
307
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
308
- st.plotly_chart(fig1, key = "figure 9")
309
-
310
- vectorizer = TfidfVectorizer()
311
- tfidf_matrix = vectorizer.fit_transform(result)
312
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
313
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
314
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
315
-
316
 
317
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
318
- x=['Resume 1', 'Jon Description'],
319
- y=['Resume 1', 'Job Description'])
320
- st.plotly_chart(fig2, key = "figure 10")
321
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
322
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
323
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
324
- else:
325
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
326
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
327
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
328
-
329
-
330
 
331
- st.subheader("Candidate Profile 6", divider = "green")
332
-
333
- txt = st.text_area("Job description", key = "text 6")
334
- job = pd.Series(txt, name="Text")
335
- if 'upload_count' not in st.session_state:
336
- st.session_state['upload_count'] = 0
337
- max_attempts = 2
338
- if st.session_state['upload_count'] < max_attempts:
339
- uploaded_files = st.file_uploader(
340
- "Upload your resume in .pdf format", type="pdf", key="candidate 6"
341
- )
342
- if uploaded_files:
343
- st.session_state['upload_count'] += 1
344
- for uploaded_file in uploaded_files:
345
- pdf_reader = PdfReader(uploaded_file)
346
- text_data = ""
347
- for page in pdf_reader.pages:
348
- text_data += page.extract_text()
349
- data = pd.Series(text_data, name = 'Text')
350
- frames = [job, data]
351
- result = pd.concat(frames)
352
-
353
-
354
- model = GLiNER.from_pretrained("urchade/gliner_base")
355
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
356
- entities = model.predict_entities(text_data, labels)
357
- df = pd.DataFrame(entities)
358
-
359
-
360
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
361
- values='score', color='label')
362
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
363
- st.plotly_chart(fig1, key = "figure 11")
364
-
365
- vectorizer = TfidfVectorizer()
366
- tfidf_matrix = vectorizer.fit_transform(result)
367
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
368
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
369
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
370
-
371
-
372
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
373
- x=['Resume 1', 'Jon Description'],
374
- y=['Resume 1', 'Job Description'])
375
- st.plotly_chart(fig2, key = "figure 12")
376
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
377
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
378
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
379
- else:
380
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
381
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
382
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
383
-
384
-
385
 
386
- st.subheader("Candidate Profile 7", divider = "green")
387
-
388
- txt = st.text_area("Job description", key = "text 7")
389
- job = pd.Series(txt, name="Text")
390
- if 'upload_count' not in st.session_state:
391
- st.session_state['upload_count'] = 0
392
- max_attempts = 2
393
- if st.session_state['upload_count'] < max_attempts:
394
- uploaded_files = st.file_uploader(
395
- "Upload your resume in .pdf format", type="pdf", key="candidate 7"
396
- )
397
- if uploaded_files:
398
- st.session_state['upload_count'] += 1
399
- for uploaded_file in uploaded_files:
400
- pdf_reader = PdfReader(uploaded_file)
401
- text_data = ""
402
- for page in pdf_reader.pages:
403
- text_data += page.extract_text()
404
- data = pd.Series(text_data, name = 'Text')
405
- frames = [job, data]
406
- result = pd.concat(frames)
407
-
408
-
409
- model = GLiNER.from_pretrained("urchade/gliner_base")
410
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
411
- entities = model.predict_entities(text_data, labels)
412
- df = pd.DataFrame(entities)
413
-
414
-
415
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
416
- values='score', color='label')
417
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
418
- st.plotly_chart(fig1, key = "figure 13")
419
-
420
- vectorizer = TfidfVectorizer()
421
- tfidf_matrix = vectorizer.fit_transform(result)
422
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
423
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
424
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
425
-
426
-
427
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
428
- x=['Resume 1', 'Jon Description'],
429
- y=['Resume 1', 'Job Description'])
430
- st.plotly_chart(fig2, key = "figure 14")
431
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
432
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
433
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
434
  else:
435
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
436
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
437
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
438
 
439
-
440
-
441
- st.subheader("Candidate Profile 8", divider = "green")
442
-
443
- txt = st.text_area("Job description", key = "text 8")
444
- job = pd.Series(txt, name="Text")
445
- if 'upload_count' not in st.session_state:
446
- st.session_state['upload_count'] = 0
447
- max_attempts = 2
448
- if st.session_state['upload_count'] < max_attempts:
449
- uploaded_files = st.file_uploader(
450
- "Upload your resume in .pdf format", type="pdf", key="candidate 8"
451
- )
452
- if uploaded_files:
453
- st.session_state['upload_count'] += 1
454
- for uploaded_file in uploaded_files:
455
- pdf_reader = PdfReader(uploaded_file)
456
- text_data = ""
457
- for page in pdf_reader.pages:
458
- text_data += page.extract_text()
459
- data = pd.Series(text_data, name = 'Text')
460
- frames = [job, data]
461
- result = pd.concat(frames)
462
-
463
-
464
- model = GLiNER.from_pretrained("urchade/gliner_base")
465
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
466
- entities = model.predict_entities(text_data, labels)
467
- df = pd.DataFrame(entities)
468
-
469
-
470
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
471
- values='score', color='label')
472
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
473
- st.plotly_chart(fig1, key = "figure 16")
474
-
475
- vectorizer = TfidfVectorizer()
476
- tfidf_matrix = vectorizer.fit_transform(result)
477
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
478
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
479
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
480
-
481
-
482
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
483
- x=['Resume 1', 'Jon Description'],
484
- y=['Resume 1', 'Job Description'])
485
- st.plotly_chart(fig2, key = "figure 18")
486
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
487
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
488
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
489
- else:
490
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
491
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
492
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
493
-
494
-
495
 
496
- st.subheader("Candidate Profile 9", divider = "green")
497
-
498
- txt = st.text_area("Job description", key = "text 9")
499
- job = pd.Series(txt, name="Text")
500
- if 'upload_count' not in st.session_state:
501
- st.session_state['upload_count'] = 0
502
- max_attempts = 2
503
- if st.session_state['upload_count'] < max_attempts:
504
- uploaded_files = st.file_uploader(
505
- "Upload your resume in .pdf format", type="pdf", key="candidate 9"
506
- )
507
- if uploaded_files:
508
- st.session_state['upload_count'] += 1
509
- for uploaded_file in uploaded_files:
510
- pdf_reader = PdfReader(uploaded_file)
511
- text_data = ""
512
- for page in pdf_reader.pages:
513
- text_data += page.extract_text()
514
- data = pd.Series(text_data, name = 'Text')
515
- frames = [job, data]
516
- result = pd.concat(frames)
517
-
518
-
519
- model = GLiNER.from_pretrained("urchade/gliner_base")
520
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
521
- entities = model.predict_entities(text_data, labels)
522
- df = pd.DataFrame(entities)
523
-
524
-
525
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
526
- values='score', color='label')
527
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
528
- st.plotly_chart(fig1, key = "figure 17")
529
-
530
- vectorizer = TfidfVectorizer()
531
- tfidf_matrix = vectorizer.fit_transform(result)
532
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
533
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
534
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
535
-
536
-
537
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
538
- x=['Resume 1', 'Jon Description'],
539
- y=['Resume 1', 'Job Description'])
540
- st.plotly_chart(fig2, key = "figure 18")
541
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
542
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
543
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
544
- else:
545
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
546
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
547
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
548
-
549
-
550
 
551
- st.subheader("Candidate Profile 10", divider = "green")
552
-
553
- txt = st.text_area("Job description", key = "text 10")
554
- job = pd.Series(txt, name="Text")
555
- if 'upload_count' not in st.session_state:
556
- st.session_state['upload_count'] = 0
557
- max_attempts = 2
558
- if st.session_state['upload_count'] < max_attempts:
559
- uploaded_files = st.file_uploader(
560
- "Upload your resume in .pdf format", type="pdf", key="candidate 10"
561
- )
562
- if uploaded_files:
563
- st.session_state['upload_count'] += 1
564
- for uploaded_file in uploaded_files:
565
- pdf_reader = PdfReader(uploaded_file)
566
- text_data = ""
567
- for page in pdf_reader.pages:
568
- text_data += page.extract_text()
569
- data = pd.Series(text_data, name = 'Text')
570
- frames = [job, data]
571
- result = pd.concat(frames)
572
-
573
-
574
- model = GLiNER.from_pretrained("urchade/gliner_base")
575
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
576
- entities = model.predict_entities(text_data, labels)
577
- df = pd.DataFrame(entities)
578
-
579
-
580
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
581
- values='score', color='label')
582
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
583
- st.plotly_chart(fig1, key = "figure 19")
584
 
585
- vectorizer = TfidfVectorizer()
586
- tfidf_matrix = vectorizer.fit_transform(result)
587
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
588
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
589
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
590
-
591
-
592
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
593
- x=['Resume 1', 'Jon Description'],
594
- y=['Resume 1', 'Job Description'])
595
- st.plotly_chart(fig2, key = "figure 20")
596
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
597
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
598
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
599
- else:
600
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
601
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
602
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
603
-
604
 
 
40
  ''')
41
 
42
 
43
+
44
 
45
  txt = st.text_area("Job description", key = "text 1")
46
  job = pd.Series(txt, name="Text")
47
+ st.dataframe(job)
48
 
49
  if 'upload_count' not in st.session_state:
50
  st.session_state['upload_count'] = 0
 
53
 
54
  if st.session_state['upload_count'] < max_attempts:
55
  uploaded_files = st.file_uploader(
56
+ "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate_upload"
57
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ if uploaded_files:
60
+ st.session_state['upload_count'] += 1
61
+ for uploaded_file in uploaded_files:
62
+ pdf_reader = PdfReader(uploaded_file)
63
+ text_data = ""
64
+ for page in pdf_reader.pages:
65
+ text_data += page.extract_text()
66
+ data = pd.Series(text_data, name = 'Text')
67
+ st.dataframe(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ frames = [job, data]
70
+ result = pd.concat(frames)
71
+ st.dataframe(result)
72
+
73
+ model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
74
+ labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
75
+ entities = model.predict_entities(text_data, labels)
76
+ df = pd.DataFrame(entities)
77
+ st.dataframe(entities)
78
+ st.dataframe(df)
79
+
80
+ import plotly.express as px
81
+ fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  values='score', color='label')
83
+ fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
84
+ st.plotly_chart(fig)
85
+
86
+
 
 
 
 
 
87
 
88
+ vectorizer = TfidfVectorizer()
89
+ tfidf_matrix = vectorizer.fit_transform(result)
90
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
91
+ st.subheader("TF-IDF Values:")
92
+ st.dataframe(tfidf_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
95
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
96
+ st.subheader("Cosine Similarity Matrix:")
97
+ st.dataframe(cosine_sim_df)
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ import plotly.express as px
100
+ st.subheader("A score closer to 1 means closer match")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
103
+ x=['text1', 'Jon Description'],
104
+ y=['text1', 'Job Description'])
105
+ st.plotly_chart(fig)
 
 
 
 
 
 
 
 
 
106
 
107
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
108
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
109
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  else:
112
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
 
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
115
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
 
 
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119