nlpblogs commited on
Commit
0274b27
·
verified ·
1 Parent(s): 2e1adec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -40
app.py CHANGED
@@ -39,63 +39,73 @@ with st.sidebar:
39
 
40
  ''')
41
 
42
- st.subheader("Candidate Profile 1", divider = "green")
43
-
44
- txt = st.text_area("Job description", key = "text 1")
45
  job = pd.Series(txt, name="Text")
46
 
47
 
48
  if 'upload_count' not in st.session_state:
49
  st.session_state['upload_count'] = 0
50
 
51
- max_attempts = 2
52
 
53
  if st.session_state['upload_count'] < max_attempts:
54
  uploaded_files = st.file_uploader(
55
- "Upload your resume in .pdf format", type="pdf", key="candidate 1"
56
  )
57
- if uploaded_files:
58
- st.session_state['upload_count'] += 1
59
- for uploaded_file in uploaded_files:
60
- pdf_reader = PdfReader(uploaded_file)
61
- text_data = ""
62
- for page in pdf_reader.pages:
63
- text_data += page.extract_text()
64
- data = pd.Series(text_data, name = 'Text')
65
- frames = [job, data]
66
- result = pd.concat(frames)
67
-
68
-
69
- model = GLiNER.from_pretrained("urchade/gliner_base")
70
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
71
- entities = model.predict_entities(text_data, labels)
72
- df = pd.DataFrame(entities)
73
-
74
-
75
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
 
 
 
 
76
  values='score', color='label')
77
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
78
- st.plotly_chart(fig1, key = "figure 1")
79
-
80
- vectorizer = TfidfVectorizer()
81
- tfidf_matrix = vectorizer.fit_transform(result)
82
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
83
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
84
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
85
-
 
 
86
 
87
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
 
 
 
88
  x=['Resume 1', 'Jon Description'],
89
  y=['Resume 1', 'Job Description'])
90
- st.plotly_chart(fig2, key = "figure 2")
91
 
92
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
93
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
94
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
 
95
 
96
  else:
97
- st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
98
-
99
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
100
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
101
 
 
39
 
40
  ''')
41
 
42
+ st.subheader ("Candidate Profile 1", divider = "green")
43
+
44
+ txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
45
  job = pd.Series(txt, name="Text")
46
 
47
 
48
  if 'upload_count' not in st.session_state:
49
  st.session_state['upload_count'] = 0
50
 
51
+ max_attempts = 3
52
 
53
  if st.session_state['upload_count'] < max_attempts:
54
  uploaded_files = st.file_uploader(
55
+ "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
56
  )
57
+
58
+ if uploaded_files:
59
+ st.session_state['upload_count'] += 1
60
+ for uploaded_file in uploaded_files:
61
+ pdf_reader = PdfReader(uploaded_file)
62
+ text_data = ""
63
+ for page in pdf_reader.pages:
64
+ text_data += page.extract_text()
65
+ data = pd.Series(text_data, name = 'Text')
66
+
67
+
68
+ frames = [job, data]
69
+ result = pd.concat(frames)
70
+
71
+
72
+ model = GLiNER.from_pretrained("urchade/gliner_base")
73
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
74
+ entities = model.predict_entities(text_data, labels)
75
+ df = pd.DataFrame(entities)
76
+
77
+
78
+ st.subheader("Profile of candidate 1")
79
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
80
  values='score', color='label')
81
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
82
+ st.plotly_chart(fig1, key = "figure 1")
83
+
84
+
85
+
86
+ vectorizer = TfidfVectorizer()
87
+ tfidf_matrix = vectorizer.fit_transform(result)
88
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
89
+
90
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
91
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
92
 
93
+
94
+
95
+ st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
96
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
97
  x=['Resume 1', 'Jon Description'],
98
  y=['Resume 1', 'Job Description'])
99
+ st.plotly_chart(fig2, key = "figure 2")
100
 
101
+
102
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
103
+ st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
104
+ st.write("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
105
 
106
  else:
107
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
108
+
109
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
110
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
111