nlpblogs commited on
Commit
e1fae35
·
verified ·
1 Parent(s): 2955054

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +489 -0
app.py CHANGED
@@ -104,12 +104,501 @@ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
104
 
105
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
 
106
 
107
+ st.subheader("Candidate Profile 2, divider = "green")
108
+
109
+ txt = st.text_area("Job description", key = "text 2")
110
+ job = pd.Series(txt, name="Text")
111
+ if 'upload_count' not in st.session_state:
112
+ st.session_state['upload_count'] = 0
113
+ max_attempts = 2
114
+ if st.session_state['upload_count'] < max_attempts:
115
+ uploaded_files = st.file_uploader(
116
+ "Upload your resume in .pdf format", type="pdf", key="candidate 2"
117
+ )
118
+ if uploaded_files:
119
+ st.session_state['upload_count'] += 1
120
+ for uploaded_file in uploaded_files:
121
+ pdf_reader = PdfReader(uploaded_file)
122
+ text_data = ""
123
+ for page in pdf_reader.pages:
124
+ text_data += page.extract_text()
125
+ data = pd.Series(text_data, name = 'Text')
126
+ frames = [job, data]
127
+ result = pd.concat(frames)
128
+
129
+
130
+ model = GLiNER.from_pretrained("urchade/gliner_base")
131
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
132
+ entities = model.predict_entities(text_data, labels)
133
+ df = pd.DataFrame(entities)
134
+
135
+
136
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
137
+ values='score', color='label')
138
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
139
+ st.plotly_chart(fig1, key = "figure 3")
140
+
141
+ vectorizer = TfidfVectorizer()
142
+ tfidf_matrix = vectorizer.fit_transform(result)
143
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
144
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
145
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
146
+
147
+
148
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
149
+ x=['Resume 1', 'Jon Description'],
150
+ y=['Resume 1', 'Job Description'])
151
+ st.plotly_chart(fig2, key = "figure 4")
152
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
153
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
154
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
155
+ else:
156
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
157
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
158
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
159
+
160
+
161
+
162
+
163
+
164
+
165
+ st.subheader("Candidate Profile 3, divider = "green")
166
+
167
+ txt = st.text_area("Job description", key = "text 3")
168
+ job = pd.Series(txt, name="Text")
169
+ if 'upload_count' not in st.session_state:
170
+ st.session_state['upload_count'] = 0
171
+ max_attempts = 2
172
+ if st.session_state['upload_count'] < max_attempts:
173
+ uploaded_files = st.file_uploader(
174
+ "Upload your resume in .pdf format", type="pdf", key="candidate 3"
175
+ )
176
+ if uploaded_files:
177
+ st.session_state['upload_count'] += 1
178
+ for uploaded_file in uploaded_files:
179
+ pdf_reader = PdfReader(uploaded_file)
180
+ text_data = ""
181
+ for page in pdf_reader.pages:
182
+ text_data += page.extract_text()
183
+ data = pd.Series(text_data, name = 'Text')
184
+ frames = [job, data]
185
+ result = pd.concat(frames)
186
+
187
+
188
+ model = GLiNER.from_pretrained("urchade/gliner_base")
189
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
190
+ entities = model.predict_entities(text_data, labels)
191
+ df = pd.DataFrame(entities)
192
+
193
+
194
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
195
+ values='score', color='label')
196
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
197
+ st.plotly_chart(fig1, key = "figure 5")
198
+
199
+ vectorizer = TfidfVectorizer()
200
+ tfidf_matrix = vectorizer.fit_transform(result)
201
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
202
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
203
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
204
+
205
+
206
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
207
+ x=['Resume 1', 'Jon Description'],
208
+ y=['Resume 1', 'Job Description'])
209
+ st.plotly_chart(fig2, key = "figure 6")
210
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
211
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
212
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
213
+ else:
214
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
215
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
216
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
217
+
218
+
219
+ st.subheader("Candidate Profile 4, divider = "green")
220
+
221
+ txt = st.text_area("Job description", key = "text 4")
222
+ job = pd.Series(txt, name="Text")
223
+ if 'upload_count' not in st.session_state:
224
+ st.session_state['upload_count'] = 0
225
+ max_attempts = 2
226
+ if st.session_state['upload_count'] < max_attempts:
227
+ uploaded_files = st.file_uploader(
228
+ "Upload your resume in .pdf format", type="pdf", key="candidate 4"
229
+ )
230
+ if uploaded_files:
231
+ st.session_state['upload_count'] += 1
232
+ for uploaded_file in uploaded_files:
233
+ pdf_reader = PdfReader(uploaded_file)
234
+ text_data = ""
235
+ for page in pdf_reader.pages:
236
+ text_data += page.extract_text()
237
+ data = pd.Series(text_data, name = 'Text')
238
+ frames = [job, data]
239
+ result = pd.concat(frames)
240
+
241
+
242
+ model = GLiNER.from_pretrained("urchade/gliner_base")
243
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
244
+ entities = model.predict_entities(text_data, labels)
245
+ df = pd.DataFrame(entities)
246
+
247
+
248
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
249
+ values='score', color='label')
250
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
251
+ st.plotly_chart(fig1, key = "figure 7")
252
+
253
+ vectorizer = TfidfVectorizer()
254
+ tfidf_matrix = vectorizer.fit_transform(result)
255
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
256
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
257
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
258
+
259
+
260
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
261
+ x=['Resume 1', 'Jon Description'],
262
+ y=['Resume 1', 'Job Description'])
263
+ st.plotly_chart(fig2, key = "figure 8")
264
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
265
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
266
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
267
+ else:
268
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
269
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
270
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
271
+
272
+
273
 
274
 
275
 
276
+ st.subheader("Candidate Profile 5, divider = "green")
277
+
278
+ txt = st.text_area("Job description", key = "text 5")
279
+ job = pd.Series(txt, name="Text")
280
+ if 'upload_count' not in st.session_state:
281
+ st.session_state['upload_count'] = 0
282
+ max_attempts = 2
283
+ if st.session_state['upload_count'] < max_attempts:
284
+ uploaded_files = st.file_uploader(
285
+ "Upload your resume in .pdf format", type="pdf", key="candidate 5"
286
+ )
287
+ if uploaded_files:
288
+ st.session_state['upload_count'] += 1
289
+ for uploaded_file in uploaded_files:
290
+ pdf_reader = PdfReader(uploaded_file)
291
+ text_data = ""
292
+ for page in pdf_reader.pages:
293
+ text_data += page.extract_text()
294
+ data = pd.Series(text_data, name = 'Text')
295
+ frames = [job, data]
296
+ result = pd.concat(frames)
297
+
298
+
299
+ model = GLiNER.from_pretrained("urchade/gliner_base")
300
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
301
+ entities = model.predict_entities(text_data, labels)
302
+ df = pd.DataFrame(entities)
303
+
304
+
305
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
306
+ values='score', color='label')
307
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
308
+ st.plotly_chart(fig1, key = "figure 9")
309
+
310
+ vectorizer = TfidfVectorizer()
311
+ tfidf_matrix = vectorizer.fit_transform(result)
312
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
313
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
314
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
315
+
316
+
317
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
318
+ x=['Resume 1', 'Jon Description'],
319
+ y=['Resume 1', 'Job Description'])
320
+ st.plotly_chart(fig2, key = "figure 10")
321
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
322
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
323
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
324
+ else:
325
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
326
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
327
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
328
+
329
+
330
 
331
+ st.subheader("Candidate Profile 6, divider = "green")
332
+
333
+ txt = st.text_area("Job description", key = "text 6")
334
+ job = pd.Series(txt, name="Text")
335
+ if 'upload_count' not in st.session_state:
336
+ st.session_state['upload_count'] = 0
337
+ max_attempts = 2
338
+ if st.session_state['upload_count'] < max_attempts:
339
+ uploaded_files = st.file_uploader(
340
+ "Upload your resume in .pdf format", type="pdf", key="candidate 6"
341
+ )
342
+ if uploaded_files:
343
+ st.session_state['upload_count'] += 1
344
+ for uploaded_file in uploaded_files:
345
+ pdf_reader = PdfReader(uploaded_file)
346
+ text_data = ""
347
+ for page in pdf_reader.pages:
348
+ text_data += page.extract_text()
349
+ data = pd.Series(text_data, name = 'Text')
350
+ frames = [job, data]
351
+ result = pd.concat(frames)
352
+
353
+
354
+ model = GLiNER.from_pretrained("urchade/gliner_base")
355
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
356
+ entities = model.predict_entities(text_data, labels)
357
+ df = pd.DataFrame(entities)
358
+
359
+
360
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
361
+ values='score', color='label')
362
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
363
+ st.plotly_chart(fig1, key = "figure 11")
364
+
365
+ vectorizer = TfidfVectorizer()
366
+ tfidf_matrix = vectorizer.fit_transform(result)
367
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
368
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
369
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
370
+
371
+
372
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
373
+ x=['Resume 1', 'Jon Description'],
374
+ y=['Resume 1', 'Job Description'])
375
+ st.plotly_chart(fig2, key = "figure 12")
376
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
377
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
378
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
379
+ else:
380
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
381
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
382
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
383
+
384
+
385
 
386
+ st.subheader("Candidate Profile 7, divider = "green")
387
+
388
+ txt = st.text_area("Job description", key = "text 7")
389
+ job = pd.Series(txt, name="Text")
390
+ if 'upload_count' not in st.session_state:
391
+ st.session_state['upload_count'] = 0
392
+ max_attempts = 2
393
+ if st.session_state['upload_count'] < max_attempts:
394
+ uploaded_files = st.file_uploader(
395
+ "Upload your resume in .pdf format", type="pdf", key="candidate 7"
396
+ )
397
+ if uploaded_files:
398
+ st.session_state['upload_count'] += 1
399
+ for uploaded_file in uploaded_files:
400
+ pdf_reader = PdfReader(uploaded_file)
401
+ text_data = ""
402
+ for page in pdf_reader.pages:
403
+ text_data += page.extract_text()
404
+ data = pd.Series(text_data, name = 'Text')
405
+ frames = [job, data]
406
+ result = pd.concat(frames)
407
+
408
+
409
+ model = GLiNER.from_pretrained("urchade/gliner_base")
410
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
411
+ entities = model.predict_entities(text_data, labels)
412
+ df = pd.DataFrame(entities)
413
+
414
+
415
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
416
+ values='score', color='label')
417
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
418
+ st.plotly_chart(fig1, key = "figure 13")
419
+
420
+ vectorizer = TfidfVectorizer()
421
+ tfidf_matrix = vectorizer.fit_transform(result)
422
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
423
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
424
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
425
+
426
+
427
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
428
+ x=['Resume 1', 'Jon Description'],
429
+ y=['Resume 1', 'Job Description'])
430
+ st.plotly_chart(fig2, key = "figure 14")
431
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
432
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
433
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
434
+ else:
435
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
436
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
437
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
438
+
439
 
440
 
441
+ st.subheader("Candidate Profile 8, divider = "green")
442
+
443
+ txt = st.text_area("Job description", key = "text 8")
444
+ job = pd.Series(txt, name="Text")
445
+ if 'upload_count' not in st.session_state:
446
+ st.session_state['upload_count'] = 0
447
+ max_attempts = 2
448
+ if st.session_state['upload_count'] < max_attempts:
449
+ uploaded_files = st.file_uploader(
450
+ "Upload your resume in .pdf format", type="pdf", key="candidate 8"
451
+ )
452
+ if uploaded_files:
453
+ st.session_state['upload_count'] += 1
454
+ for uploaded_file in uploaded_files:
455
+ pdf_reader = PdfReader(uploaded_file)
456
+ text_data = ""
457
+ for page in pdf_reader.pages:
458
+ text_data += page.extract_text()
459
+ data = pd.Series(text_data, name = 'Text')
460
+ frames = [job, data]
461
+ result = pd.concat(frames)
462
+
463
+
464
+ model = GLiNER.from_pretrained("urchade/gliner_base")
465
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
466
+ entities = model.predict_entities(text_data, labels)
467
+ df = pd.DataFrame(entities)
468
+
469
+
470
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
471
+ values='score', color='label')
472
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
473
+ st.plotly_chart(fig1, key = "figure 16")
474
+
475
+ vectorizer = TfidfVectorizer()
476
+ tfidf_matrix = vectorizer.fit_transform(result)
477
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
478
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
479
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
480
+
481
+
482
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
483
+ x=['Resume 1', 'Jon Description'],
484
+ y=['Resume 1', 'Job Description'])
485
+ st.plotly_chart(fig2, key = "figure 18")
486
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
487
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
488
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
489
+ else:
490
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
491
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
492
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
493
+
494
+
495
 
496
+ st.subheader("Candidate Profile 9, divider = "green")
497
+
498
+ txt = st.text_area("Job description", key = "text 9")
499
+ job = pd.Series(txt, name="Text")
500
+ if 'upload_count' not in st.session_state:
501
+ st.session_state['upload_count'] = 0
502
+ max_attempts = 2
503
+ if st.session_state['upload_count'] < max_attempts:
504
+ uploaded_files = st.file_uploader(
505
+ "Upload your resume in .pdf format", type="pdf", key="candidate 9"
506
+ )
507
+ if uploaded_files:
508
+ st.session_state['upload_count'] += 1
509
+ for uploaded_file in uploaded_files:
510
+ pdf_reader = PdfReader(uploaded_file)
511
+ text_data = ""
512
+ for page in pdf_reader.pages:
513
+ text_data += page.extract_text()
514
+ data = pd.Series(text_data, name = 'Text')
515
+ frames = [job, data]
516
+ result = pd.concat(frames)
517
+
518
+
519
+ model = GLiNER.from_pretrained("urchade/gliner_base")
520
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
521
+ entities = model.predict_entities(text_data, labels)
522
+ df = pd.DataFrame(entities)
523
+
524
+
525
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
526
+ values='score', color='label')
527
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
528
+ st.plotly_chart(fig1, key = "figure 17")
529
+
530
+ vectorizer = TfidfVectorizer()
531
+ tfidf_matrix = vectorizer.fit_transform(result)
532
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
533
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
534
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
535
+
536
+
537
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
538
+ x=['Resume 1', 'Jon Description'],
539
+ y=['Resume 1', 'Job Description'])
540
+ st.plotly_chart(fig2, key = "figure 18")
541
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
542
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
543
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
544
+ else:
545
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
546
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
547
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
548
+
549
+
550
 
551
+ st.subheader("Candidate Profile 10, divider = "green")
552
+
553
+ txt = st.text_area("Job description", key = "text 10")
554
+ job = pd.Series(txt, name="Text")
555
+ if 'upload_count' not in st.session_state:
556
+ st.session_state['upload_count'] = 0
557
+ max_attempts = 2
558
+ if st.session_state['upload_count'] < max_attempts:
559
+ uploaded_files = st.file_uploader(
560
+ "Upload your resume in .pdf format", type="pdf", key="candidate 10"
561
+ )
562
+ if uploaded_files:
563
+ st.session_state['upload_count'] += 1
564
+ for uploaded_file in uploaded_files:
565
+ pdf_reader = PdfReader(uploaded_file)
566
+ text_data = ""
567
+ for page in pdf_reader.pages:
568
+ text_data += page.extract_text()
569
+ data = pd.Series(text_data, name = 'Text')
570
+ frames = [job, data]
571
+ result = pd.concat(frames)
572
+
573
+
574
+ model = GLiNER.from_pretrained("urchade/gliner_base")
575
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
576
+ entities = model.predict_entities(text_data, labels)
577
+ df = pd.DataFrame(entities)
578
+
579
+
580
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
581
+ values='score', color='label')
582
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
583
+ st.plotly_chart(fig1, key = "figure 19")
584
+
585
+ vectorizer = TfidfVectorizer()
586
+ tfidf_matrix = vectorizer.fit_transform(result)
587
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
588
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
589
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
590
+
591
+
592
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
593
+ x=['Resume 1', 'Jon Description'],
594
+ y=['Resume 1', 'Job Description'])
595
+ st.plotly_chart(fig2, key = "figure 20")
596
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
597
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
598
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
599
+ else:
600
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
601
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
602
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
603
+
604
+