nlpblogs commited on
Commit
a1c7830
·
verified ·
1 Parent(s): 1d586a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -119
app.py CHANGED
@@ -39,11 +39,11 @@ with st.sidebar:
39
 
40
  ''')
41
 
42
- st.subheader ("Candidate Profile 1", divider = "red")
43
 
44
- txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
45
- job = pd.Series(txt, name="Text")
46
 
 
 
 
47
 
48
  if 'upload_count' not in st.session_state:
49
  st.session_state['upload_count'] = 0
@@ -54,68 +54,59 @@ if st.session_state['upload_count'] < max_attempts:
54
  uploaded_files = st.file_uploader(
55
  "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
56
  )
57
-
58
- if uploaded_files:
59
- st.session_state['upload_count'] += 1
60
- for uploaded_file in uploaded_files:
61
- pdf_reader = PdfReader(uploaded_file)
62
- text_data = ""
63
- for page in pdf_reader.pages:
64
- text_data += page.extract_text()
65
- data = pd.Series(text_data, name = 'Text')
66
-
67
-
68
- frames = [job, data]
69
- result = pd.concat(frames)
70
-
71
-
72
- model = GLiNER.from_pretrained("urchade/gliner_base")
73
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
74
- entities = model.predict_entities(text_data, labels)
75
- df = pd.DataFrame(entities)
76
-
77
-
78
- st.subheader("Profile of candidate 1", divider = "green")
79
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
80
- values='score', color='label')
81
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
82
- st.plotly_chart(fig1)
83
-
84
-
85
-
86
- vectorizer = TfidfVectorizer()
87
- tfidf_matrix = vectorizer.fit_transform(result)
88
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
89
-
90
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
91
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
92
-
93
-
94
-
95
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description", divider = "green")
96
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
97
- x=['Resume 1', 'Jon Description'],
98
- y=['Resume 1', 'Job Description'])
99
- st.plotly_chart(fig2)
100
-
101
-
102
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
103
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
104
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
105
-
106
  else:
107
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
 
 
108
 
109
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
110
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
111
-
112
-
113
 
 
114
 
115
- st.subheader ("Candidate Profile 1", divider = "red")
116
 
117
 
118
 
 
 
 
119
 
120
  if 'upload_count' not in st.session_state:
121
  st.session_state['upload_count'] = 0
@@ -124,69 +115,50 @@ max_attempts = 3
124
 
125
  if st.session_state['upload_count'] < max_attempts:
126
  uploaded_files = st.file_uploader(
127
- "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 2"
128
  )
129
-
130
- if uploaded_files:
131
- st.session_state['upload_count'] += 1
132
- for uploaded_file in uploaded_files:
133
- pdf_reader = PdfReader(uploaded_file)
134
- text_data = ""
135
- for page in pdf_reader.pages:
136
- text_data += page.extract_text()
137
- data = pd.Series(text_data, name = 'Text')
138
-
139
-
140
- frames = [job, data]
141
- result = pd.concat(frames)
142
-
143
-
144
- model = GLiNER.from_pretrained("urchade/gliner_base")
145
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
146
- entities = model.predict_entities(text_data, labels)
147
- df = pd.DataFrame(entities)
148
-
149
-
150
- st.subheader("Profile of candidate 1")
151
- fig3 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
152
- values='score', color='label')
153
- fig3.update_layout(margin = dict(t=50, l=25, r=25, b=25))
154
- st.plotly_chart(fig3)
155
-
156
-
157
-
158
- vectorizer = TfidfVectorizer()
159
- tfidf_matrix = vectorizer.fit_transform(result)
160
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
161
-
162
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
163
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
164
-
165
-
166
-
167
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
168
- fig4 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
169
- x=['Resume 1', 'Jon Description'],
170
- y=['Resume 1', 'Job Description'])
171
- st.plotly_chart(fig4)
172
-
173
-
174
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
175
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
176
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
177
-
178
  else:
179
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
180
-
181
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
182
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
183
-
184
-
185
-
186
-
187
-
188
-
189
-
190
 
191
 
192
 
 
39
 
40
  ''')
41
 
 
42
 
 
 
43
 
44
+ st.subheader("Candidate Profile 1", divider="red")
45
+ txt = st.text_area("Paste the job description and then press Ctrl + Enter", key="text 1")
46
+ job = pd.Series(txt, name="Text")
47
 
48
  if 'upload_count' not in st.session_state:
49
  st.session_state['upload_count'] = 0
 
54
  uploaded_files = st.file_uploader(
55
  "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
56
  )
57
+ if uploaded_files:
58
+ st.session_state['upload_count'] += 1
59
+ for uploaded_file in uploaded_files:
60
+ pdf_reader = PdfReader(uploaded_file)
61
+ text_data = ""
62
+ for page in pdf_reader.pages:
63
+ text_data += page.extract_text()
64
+ data = pd.Series(text_data, name='Text')
65
+ frames = [job, data]
66
+ result = pd.concat(frames)
67
+ model = pipeline("ner", model="urchade/gliner_base", aggregation_strategy="simple")
68
+ labels = ["person", "country", "organization", "date", "time", "role", "skills", "year"]
69
+ entities = model(text_data, labels=labels)
70
+ df = pd.DataFrame(entities)
71
+
72
+ st.subheader("Profile of candidate 1", divider="green")
73
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
74
+ values='score', color='label')
75
+ fig1.update_layout(margin=dict(t=50, l=25, r=25, b=25))
76
+ st.plotly_chart(fig1, key = "Figure 1")
77
+
78
+ vectorizer = TfidfVectorizer()
79
+ tfidf_matrix = vectorizer.fit_transform(result)
80
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
81
+
82
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
83
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
84
+
85
+ st.subheader("Measuring similarity between keywords of candidate profile 1 and job description", divider="green")
86
+ fig2 = px.imshow(cosine_sim_df, text_auto=True,
87
+ labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
88
+ x=['Resume 1', 'Job Description'],
89
+ y=['Resume 1', 'Job Description'])
90
+ st.plotly_chart(fig2, key = "Figure 2")
91
+
92
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
93
+ st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
94
+ st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
 
 
 
 
 
 
 
 
 
 
 
95
  else:
96
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts}).")
97
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
98
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
99
 
100
+
 
 
 
101
 
102
+
103
 
 
104
 
105
 
106
 
107
+ st.subheader("Candidate Profile 2", divider="red")
108
+ txt = st.text_area("Paste the job description and then press Ctrl + Enter", key="text 1")
109
+ job = pd.Series(txt, name="Text")
110
 
111
  if 'upload_count' not in st.session_state:
112
  st.session_state['upload_count'] = 0
 
115
 
116
  if st.session_state['upload_count'] < max_attempts:
117
  uploaded_files = st.file_uploader(
118
+ "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
119
  )
120
+ if uploaded_files:
121
+ st.session_state['upload_count'] += 1
122
+ for uploaded_file in uploaded_files:
123
+ pdf_reader = PdfReader(uploaded_file)
124
+ text_data = ""
125
+ for page in pdf_reader.pages:
126
+ text_data += page.extract_text()
127
+ data = pd.Series(text_data, name='Text')
128
+ frames = [job, data]
129
+ result = pd.concat(frames)
130
+ model = pipeline("ner", model="urchade/gliner_base", aggregation_strategy="simple")
131
+ labels = ["person", "country", "organization", "date", "time", "role", "skills", "year"]
132
+ entities = model(text_data, labels=labels)
133
+ df = pd.DataFrame(entities)
134
+
135
+ st.subheader("Profile of candidate 1", divider="green")
136
+ fig3 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
137
+ values='score', color='label')
138
+ fig3.update_layout(margin=dict(t=50, l=25, r=25, b=25))
139
+ st.plotly_chart(fig3, key = "Figure 3")
140
+
141
+ vectorizer = TfidfVectorizer()
142
+ tfidf_matrix = vectorizer.fit_transform(result)
143
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
144
+
145
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
146
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
147
+
148
+ st.subheader("Measuring similarity between keywords of candidate profile 1 and job description", divider="green")
149
+ fig4 = px.imshow(cosine_sim_df, text_auto=True,
150
+ labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
151
+ x=['Resume 1', 'Job Description'],
152
+ y=['Resume 1', 'Job Description'])
153
+ st.plotly_chart(fig4, key = "Figure 4")
154
+
155
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
156
+ st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
157
+ st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
 
 
 
 
 
 
 
 
 
 
 
158
  else:
159
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts}).")
160
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
161
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
 
 
 
 
 
 
 
 
162
 
163
 
164