nlpblogs commited on
Commit
34d0e07
·
verified ·
1 Parent(s): 70a0e2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -38
app.py CHANGED
@@ -63,9 +63,7 @@ with st.sidebar:
63
 
64
 
65
 
66
-
67
-
68
- txt = st.text_area("Job description", key="text 1")
69
  job = pd.Series(txt, name="Text")
70
  st.dataframe(job)
71
 
@@ -79,61 +77,103 @@ if st.session_state['upload_count'] < max_attempts:
79
  "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate_upload"
80
  )
81
 
82
- if uploaded_files:
83
- st.session_state['upload_count'] += 1
84
- all_resumes_text = []
85
- for uploaded_file in uploaded_files:
86
- pdf_reader = PdfReader(uploaded_file)
87
- text_data = ""
88
- for page in pdf_reader.pages:
89
- text_data += page.extract_text()
90
- all_resumes_text.append(text_data)
91
- data = pd.Series(text_data, name='Text')
92
- st.dataframe(data)
93
-
94
- frames = [job, data]
95
- result = pd.concat(frames)
96
- st.dataframe(result)
97
-
98
- model = pipeline("ner", model="xomad/gliner-model-merge-large-v1.0", aggregation_strategy="simple")
99
- labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
100
- entities = model(text_data, labels=labels)
101
- df_entities = pd.DataFrame(entities)
102
- st.subheader("Extracted Entities:")
103
- st.dataframe(df_entities)
104
-
105
- fig = px.treemap(df_entities, path=[px.Constant("all"), 'word', 'entity_group'],
106
- values='score', color='entity_group')
107
- fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
108
- st.plotly_chart(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
 
 
 
 
 
110
  vectorizer = TfidfVectorizer()
111
  tfidf_matrix = vectorizer.fit_transform(result)
112
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
113
  st.subheader("TF-IDF Values:")
114
  st.dataframe(tfidf_df)
115
-
116
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
117
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
118
  st.subheader("Cosine Similarity Matrix:")
119
  st.dataframe(cosine_sim_df)
120
 
121
- fig = px.imshow(cosine_sim_df, text_auto=True,
122
- labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
123
- x=['Job Description'] + [f'Candidate {i+1}' for i in range(len(all_resumes_text))],
124
- y=['Job Description'] + [f'Candidate {i+1}' for i in range(len(all_resumes_text))])
 
 
125
  st.plotly_chart(fig)
126
 
127
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
128
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
129
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
130
 
131
- else:
132
- st.warning(f"Maximum upload attempts reached ({max_attempts}). Please refresh to upload more files.")
133
-
134
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
135
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
136
 
137
 
138
 
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
 
66
+ txt = st.text_area("Job description", key = "text 1")
 
 
67
  job = pd.Series(txt, name="Text")
68
  st.dataframe(job)
69
 
 
77
  "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate_upload"
78
  )
79
 
80
+ if uploaded_files:
81
+ st.session_state['upload_count'] += 1
82
+
83
+ uploaded_files = st.file_uploader(
84
+ "Choose a CSV file", accept_multiple_files=True, type = "pdf", key = "candidate 1"
85
+ )
86
+
87
+
88
+
89
+
90
+
91
+
92
+ for uploaded_file in uploaded_files:
93
+ pdf_reader = PdfReader(uploaded_file)
94
+ text_data = ""
95
+ for page in pdf_reader.pages:
96
+ text_data += page.extract_text()
97
+ data = pd.Series(text_data, name = 'Text')
98
+ st.dataframe(data)
99
+
100
+ frames = [job, data]
101
+ result = pd.concat(frames)
102
+ st.dataframe(result)
103
+
104
+ model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
105
+ labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
106
+ entities = model.predict_entities(text_data, labels)
107
+ df = pd.DataFrame(entities)
108
+ st.dataframe(entities)
109
+ st.dataframe(df)
110
+
111
+ import plotly.express as px
112
+ fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
113
+ values='score', color='label')
114
+ fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
115
+ st.plotly_chart(fig)
116
+
117
+
118
+
119
+
120
 
121
+
122
+
123
+
124
+
125
+
126
  vectorizer = TfidfVectorizer()
127
  tfidf_matrix = vectorizer.fit_transform(result)
128
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
129
  st.subheader("TF-IDF Values:")
130
  st.dataframe(tfidf_df)
131
+
132
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
133
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
134
  st.subheader("Cosine Similarity Matrix:")
135
  st.dataframe(cosine_sim_df)
136
 
137
+ import plotly.express as px
138
+ st.subheader("A score closer to 1 means closer match")
139
+
140
+ fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
141
+ x=['text1', 'Jon Description'],
142
+ y=['text1', 'Job Description'])
143
  st.plotly_chart(fig)
144
 
145
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
146
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
147
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
148
 
149
+
 
 
150
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
151
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
152
 
153
 
154
 
155
 
156
+
157
+ st.divider()
158
+
159
+ txt = st.text_area("Job description", key = "text 2")
160
+ job = pd.Series(txt, name="Text")
161
+ st.dataframe(job)
162
+
163
+ uploaded_files = st.file_uploader(
164
+ "Choose a CSV file", accept_multiple_files=True, type = "pdf", key = "candidate 2"
165
+ )
166
+ for uploaded_file in uploaded_files:
167
+ pdf_reader = PdfReader(uploaded_file)
168
+ text_data = ""
169
+ for page in pdf_reader.pages:
170
+ text_data += page.extract_text()
171
+ data = pd.Series(text_data, name = 'Text')
172
+ st.dataframe(data)
173
+ frames = [job, data]
174
+ result = pd.concat(frames)
175
+ st.dataframe(result)
176
+
177
+
178
+
179
+