nlpblogs commited on
Commit
04ccb1c
·
verified ·
1 Parent(s): 1e10cc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -85
app.py CHANGED
@@ -41,10 +41,12 @@ with st.sidebar:
41
 
42
 
43
 
44
- st.subheader("Candidate Profile 1", divider="red")
45
- txt = st.text_area("Paste the job description and then press Ctrl + Enter", key="text 1")
 
46
  job = pd.Series(txt, name="Text")
47
 
 
48
  if 'upload_count' not in st.session_state:
49
  st.session_state['upload_count'] = 0
50
 
@@ -54,57 +56,67 @@ if st.session_state['upload_count'] < max_attempts:
54
  uploaded_files = st.file_uploader(
55
  "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
56
  )
57
- if uploaded_files:
58
- st.session_state['upload_count'] += 1
59
- for uploaded_file in uploaded_files:
60
- pdf_reader = PdfReader(uploaded_file)
61
- text_data = ""
62
- for page in pdf_reader.pages:
63
- text_data += page.extract_text()
64
- data = pd.Series(text_data, name='Text')
65
- frames = [job, data]
66
- result = pd.concat(frames)
67
- model = GLiNER.from_pretrained("urchade/gliner_base")
68
- labels = ["person", "country", "organization", "date", "time", "role", "skills", "year"]
69
- entities = model(text_data, labels=labels)
70
- df = pd.DataFrame(entities)
71
-
72
- st.subheader("Profile of candidate 1", divider="green")
73
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
74
- values='score', color='label')
75
- fig1.update_layout(margin=dict(t=50, l=25, r=25, b=25))
76
- st.plotly_chart(fig1, key = "Figure 1")
77
-
78
- vectorizer = TfidfVectorizer()
79
- tfidf_matrix = vectorizer.fit_transform(result)
80
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
81
-
82
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
83
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
84
-
85
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description", divider="green")
86
- fig2 = px.imshow(cosine_sim_df, text_auto=True,
87
- labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
88
- x=['Resume 1', 'Job Description'],
89
- y=['Resume 1', 'Job Description'])
90
- st.plotly_chart(fig2, key = "Figure 2")
91
-
92
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
93
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
94
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
 
 
 
 
 
 
 
 
 
 
 
95
  else:
96
- st.warning(f"You have reached the maximum upload attempts ({max_attempts}).")
97
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
98
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
99
-
100
-
101
 
102
-
103
 
104
 
105
 
106
 
107
- st.subheader("Candidate Profile 2", divider="red")
 
 
108
 
109
  if 'upload_count' not in st.session_state:
110
  st.session_state['upload_count'] = 0
@@ -113,52 +125,74 @@ max_attempts = 3
113
 
114
  if st.session_state['upload_count'] < max_attempts:
115
  uploaded_files = st.file_uploader(
116
- "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 2"
117
  )
118
- if uploaded_files:
119
- st.session_state['upload_count'] += 1
120
- for uploaded_file in uploaded_files:
121
- pdf_reader = PdfReader(uploaded_file)
122
- text_data = ""
123
- for page in pdf_reader.pages:
124
- text_data += page.extract_text()
125
- data = pd.Series(text_data, name='Text')
126
- frames = [job, data]
127
- result = pd.concat(frames)
128
- model = GLiNER.from_pretrained("urchade/gliner_base")
129
- labels = ["person", "country", "organization", "date", "time", "role", "skills", "year"]
130
- entities = model(text_data, labels=labels)
131
- df = pd.DataFrame(entities)
132
-
133
- st.subheader("Profile of candidate 1", divider="green")
134
- fig3 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
135
- values='score', color='label')
136
- fig3.update_layout(margin=dict(t=50, l=25, r=25, b=25))
137
- st.plotly_chart(fig3, key = "Figure 3")
138
-
139
- vectorizer = TfidfVectorizer()
140
- tfidf_matrix = vectorizer.fit_transform(result)
141
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
142
-
143
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
144
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
145
-
146
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description", divider="green")
147
- fig4 = px.imshow(cosine_sim_df, text_auto=True,
148
- labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
149
- x=['Resume 1', 'Job Description'],
150
- y=['Resume 1', 'Job Description'])
151
- st.plotly_chart(fig4, key = "Figure 4")
152
-
153
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
154
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
155
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
 
 
 
 
 
 
 
 
 
 
156
  else:
157
- st.warning(f"You have reached the maximum upload attempts ({max_attempts}).")
158
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
159
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
160
 
161
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
 
164
 
 
41
 
42
 
43
 
44
+ st.subheader ("Candidate Profile 1", divider = "green")
45
+
46
+ txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
47
  job = pd.Series(txt, name="Text")
48
 
49
+
50
  if 'upload_count' not in st.session_state:
51
  st.session_state['upload_count'] = 0
52
 
 
56
  uploaded_files = st.file_uploader(
57
  "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
58
  )
59
+
60
+ if uploaded_files:
61
+ st.session_state['upload_count'] += 1
62
+ for uploaded_file in uploaded_files:
63
+ pdf_reader = PdfReader(uploaded_file)
64
+ text_data = ""
65
+ for page in pdf_reader.pages:
66
+ text_data += page.extract_text()
67
+ data = pd.Series(text_data, name = 'Text')
68
+
69
+
70
+ frames = [job, data]
71
+ result = pd.concat(frames)
72
+
73
+
74
+ model = GLiNER.from_pretrained("urchade/gliner_base")
75
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
76
+ entities = model.predict_entities(text_data, labels)
77
+ df = pd.DataFrame(entities)
78
+
79
+
80
+ st.subheader("Profile of candidate 1")
81
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
82
+ values='score', color='label')
83
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
84
+ st.plotly_chart(fig1, key = "figure 1")
85
+
86
+
87
+
88
+ vectorizer = TfidfVectorizer()
89
+ tfidf_matrix = vectorizer.fit_transform(result)
90
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
91
+
92
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
93
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
94
+
95
+
96
+
97
+ st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
98
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
99
+ x=['Resume 1', 'Jon Description'],
100
+ y=['Resume 1', 'Job Description'])
101
+ st.plotly_chart(fig2, key = "figure 2")
102
+
103
+
104
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
105
+ st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
106
+ st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
107
+
108
  else:
109
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
110
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
111
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
 
 
112
 
 
113
 
114
 
115
 
116
 
117
+ st.subheader ("Candidate Profile 2", divider = "green")
118
+
119
+
120
 
121
  if 'upload_count' not in st.session_state:
122
  st.session_state['upload_count'] = 0
 
125
 
126
  if st.session_state['upload_count'] < max_attempts:
127
  uploaded_files = st.file_uploader(
128
+ "Upload your resume in .pdf format", accept_multiple_files=True, type="pdf", key="candidate 2"
129
  )
130
+
131
+ if uploaded_files:
132
+ st.session_state['upload_count'] += 1
133
+ for uploaded_file in uploaded_files:
134
+ pdf_reader = PdfReader(uploaded_file)
135
+ text_data = ""
136
+ for page in pdf_reader.pages:
137
+ text_data += page.extract_text()
138
+ data = pd.Series(text_data, name = 'Text')
139
+
140
+
141
+ frames = [job, data]
142
+ result = pd.concat(frames)
143
+
144
+
145
+ model = GLiNER.from_pretrained("urchade/gliner_base")
146
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
147
+ entities = model.predict_entities(text_data, labels)
148
+ df = pd.DataFrame(entities)
149
+
150
+
151
+
152
+ fig3 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
153
+ values='score', color='label')
154
+ fig3.update_layout(margin = dict(t=50, l=25, r=25, b=25))
155
+ st.plotly_chart(fig3, key = "figure 3")
156
+
157
+
158
+
159
+ vectorizer = TfidfVectorizer()
160
+ tfidf_matrix = vectorizer.fit_transform(result)
161
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
162
+
163
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
164
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
165
+
166
+
167
+
168
+
169
+ fig4 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
170
+ x=['Resume 2', 'Jon Description'],
171
+ y=['Resume 2', 'Job Description'])
172
+ st.plotly_chart(fig4, key = "figure 4")
173
+
174
+
175
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
176
+ st.write(f"Similarity with Candidate Profile. A score closer to 1 means higher similarity. {i + 1}: {similarity_score:.4f}")
177
+
178
  else:
179
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
180
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
181
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
182
 
183
 
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
 
197
 
198