nlpblogs commited on
Commit
446b3f9
·
verified ·
1 Parent(s): 6ae6756

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -109
app.py CHANGED
@@ -39,150 +39,128 @@ with st.sidebar:
39
 
40
  ''')
41
 
42
- st.subheader ("Candidate Profile 1", divider = "green")
43
 
44
- txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
 
 
45
  job = pd.Series(txt, name="Text")
46
 
47
 
48
  if 'upload_count' not in st.session_state:
49
  st.session_state['upload_count'] = 0
50
 
51
- max_attempts = 3
52
 
53
  if st.session_state['upload_count'] < max_attempts:
54
  uploaded_files = st.file_uploader(
55
- "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
56
  )
57
-
58
- if uploaded_files:
59
- st.session_state['upload_count'] += 1
60
- for uploaded_file in uploaded_files:
61
- pdf_reader = PdfReader(uploaded_file)
62
- text_data = ""
63
- for page in pdf_reader.pages:
64
- text_data += page.extract_text()
65
- data = pd.Series(text_data, name = 'Text')
66
-
67
-
68
- frames = [job, data]
69
- result = pd.concat(frames)
70
-
71
-
72
- model = GLiNER.from_pretrained("urchade/gliner_base")
73
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
74
- entities = model.predict_entities(text_data, labels)
75
- df = pd.DataFrame(entities)
76
-
77
-
78
- st.subheader("Profile of candidate 1")
79
- fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
80
  values='score', color='label')
81
- fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
82
- st.plotly_chart(fig1, key = "figure 1")
83
-
84
-
85
-
86
- vectorizer = TfidfVectorizer()
87
- tfidf_matrix = vectorizer.fit_transform(result)
88
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
89
-
90
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
91
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
92
-
93
-
94
-
95
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
96
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
97
  x=['Resume 1', 'Jon Description'],
98
  y=['Resume 1', 'Job Description'])
99
- st.plotly_chart(fig2, key = "figure 2")
100
 
101
-
102
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
103
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
104
- st.write("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
105
 
106
  else:
107
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
108
-
109
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
110
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
111
 
112
 
113
 
114
-
115
-
116
- st.subheader ("Candidate Profile 2", divider = "green")
117
-
118
-
119
-
120
  if 'upload_count' not in st.session_state:
121
  st.session_state['upload_count'] = 0
122
-
123
- max_attempts = 3
124
-
125
  if st.session_state['upload_count'] < max_attempts:
126
  uploaded_files = st.file_uploader(
127
- "Upload your resume in .pdf format", accept_multiple_files=True, type="pdf", key="candidate 2"
128
  )
129
-
130
- if uploaded_files:
131
- st.session_state['upload_count'] += 1
132
- for uploaded_file in uploaded_files:
133
- pdf_reader = PdfReader(uploaded_file)
134
- text_data = ""
135
- for page in pdf_reader.pages:
136
- text_data += page.extract_text()
137
- data = pd.Series(text_data, name = 'Text')
138
-
139
-
140
- frames = [job, data]
141
- result = pd.concat(frames)
142
-
143
-
144
- model = GLiNER.from_pretrained("urchade/gliner_base")
145
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
146
- entities = model.predict_entities(text_data, labels)
147
- df = pd.DataFrame(entities)
148
-
149
-
150
-
151
- fig3 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
152
  values='score', color='label')
153
- fig3.update_layout(margin = dict(t=50, l=25, r=25, b=25))
154
- st.plotly_chart(fig3, key = "figure 3")
155
-
156
-
157
-
158
- vectorizer = TfidfVectorizer()
159
- tfidf_matrix = vectorizer.fit_transform(result)
160
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
161
-
162
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
163
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
164
-
165
-
166
-
167
-
168
- fig4 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
169
- x=['Resume 2', 'Jon Description'],
170
- y=['Resume 2', 'Job Description'])
171
- st.plotly_chart(fig4, key = "figure 4")
172
-
173
-
174
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
175
- st.write(f"Similarity with Candidate Profile. A score closer to 1 means higher similarity. {i + 1}: {similarity_score:.4f}")
176
-
177
  else:
178
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
179
-
180
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
181
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
182
 
183
 
184
 
185
 
 
186
 
187
 
188
 
 
39
 
40
  ''')
41
 
 
42
 
43
+ st.subheader("Candidate Profile 1", divider = "green")
44
+
45
+ txt = st.text_area("Job description", key = "text 1")
46
  job = pd.Series(txt, name="Text")
47
 
48
 
49
  if 'upload_count' not in st.session_state:
50
  st.session_state['upload_count'] = 0
51
 
52
+ max_attempts = 2
53
 
54
  if st.session_state['upload_count'] < max_attempts:
55
  uploaded_files = st.file_uploader(
56
+ "Upload your resume in .pdf format", type="pdf", key="candidate 1"
57
  )
58
+ if uploaded_files:
59
+ st.session_state['upload_count'] += 1
60
+ for uploaded_file in uploaded_files:
61
+ pdf_reader = PdfReader(uploaded_file)
62
+ text_data = ""
63
+ for page in pdf_reader.pages:
64
+ text_data += page.extract_text()
65
+ data = pd.Series(text_data, name = 'Text')
66
+ frames = [job, data]
67
+ result = pd.concat(frames)
68
+
69
+
70
+ model = GLiNER.from_pretrained("urchade/gliner_base")
71
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
72
+ entities = model.predict_entities(text_data, labels)
73
+ df = pd.DataFrame(entities)
74
+
75
+
76
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
 
 
 
 
77
  values='score', color='label')
78
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
79
+ st.plotly_chart(fig1, key = "figure 1")
80
+
81
+ vectorizer = TfidfVectorizer()
82
+ tfidf_matrix = vectorizer.fit_transform(result)
83
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
84
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
85
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
86
+
87
+
88
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
 
 
 
 
 
89
  x=['Resume 1', 'Jon Description'],
90
  y=['Resume 1', 'Job Description'])
91
+ st.plotly_chart(fig2, key = "figure 2")
92
 
93
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
94
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
95
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
 
96
 
97
  else:
98
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
99
+
100
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
101
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
102
 
103
 
104
 
105
+
106
+
107
+ st.subheader("Candidate Profile 2", divider = "green")
108
+
109
+ txt = st.text_area("Job description", key = "text 2")
110
+ job = pd.Series(txt, name="Text")
111
  if 'upload_count' not in st.session_state:
112
  st.session_state['upload_count'] = 0
113
+ max_attempts = 2
 
 
114
  if st.session_state['upload_count'] < max_attempts:
115
  uploaded_files = st.file_uploader(
116
+ "Upload your resume in .pdf format", type="pdf", key="candidate 2"
117
  )
118
+ if uploaded_files:
119
+ st.session_state['upload_count'] += 1
120
+ for uploaded_file in uploaded_files:
121
+ pdf_reader = PdfReader(uploaded_file)
122
+ text_data = ""
123
+ for page in pdf_reader.pages:
124
+ text_data += page.extract_text()
125
+ data = pd.Series(text_data, name = 'Text')
126
+ frames = [job, data]
127
+ result = pd.concat(frames)
128
+
129
+
130
+ model = GLiNER.from_pretrained("urchade/gliner_base")
131
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
132
+ entities = model.predict_entities(text_data, labels)
133
+ df = pd.DataFrame(entities)
134
+
135
+
136
+ fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
 
 
 
 
137
  values='score', color='label')
138
+ fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
139
+ st.plotly_chart(fig1, key = "figure 3")
140
+
141
+ vectorizer = TfidfVectorizer()
142
+ tfidf_matrix = vectorizer.fit_transform(result)
143
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
144
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
145
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
146
+
147
+
148
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
149
+ x=['Resume 1', 'Jon Description'],
150
+ y=['Resume 1', 'Job Description'])
151
+ st.plotly_chart(fig2, key = "figure 4")
152
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
153
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
154
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
 
 
 
 
 
 
 
155
  else:
156
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
 
157
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
158
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
159
 
160
 
161
 
162
 
163
+
164
 
165
 
166