nlpblogs commited on
Commit
674df9a
·
verified ·
1 Parent(s): 2eb02f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -58
app.py CHANGED
@@ -63,7 +63,7 @@ with st.sidebar:
63
 
64
 
65
 
66
- txt = st.text_area("Job description", key = "text 1")
67
  job = pd.Series(txt, name="Text")
68
  st.dataframe(job)
69
 
@@ -76,66 +76,59 @@ if st.session_state['upload_count'] < max_attempts:
76
  uploaded_files = st.file_uploader(
77
  "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate_upload"
78
  )
79
-
80
- if uploaded_files:
81
- st.session_state['upload_count'] += 1
82
- for uploaded_file in uploaded_files:
83
- pdf_reader = PdfReader(uploaded_file)
84
- text_data = ""
85
- for page in pdf_reader.pages:
86
- text_data += page.extract_text()
87
- data = pd.Series(text_data, name = 'Text')
88
- st.dataframe(data)
89
-
90
- frames = [job, data]
91
- result = pd.concat(frames)
92
- st.dataframe(result)
93
-
94
- model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
95
- labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
96
- entities = model.predict_entities(text_data, labels)
97
- df = pd.DataFrame(entities)
98
- st.dataframe(entities)
99
- st.dataframe(df)
100
-
101
- import plotly.express as px
102
- fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
103
- values='score', color='label')
104
- fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
105
- st.plotly_chart(fig)
106
-
107
-
108
-
109
- vectorizer = TfidfVectorizer()
110
- tfidf_matrix = vectorizer.fit_transform(result)
111
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
112
- st.subheader("TF-IDF Values:")
113
- st.dataframe(tfidf_df)
114
-
115
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
116
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
117
- st.subheader("Cosine Similarity Matrix:")
118
- st.dataframe(cosine_sim_df)
119
-
120
- import plotly.express as px
121
- st.subheader("A score closer to 1 means closer match")
122
-
123
- fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
124
- x=['text1', 'Jon Description'],
125
- y=['text1', 'Job Description'])
126
- st.plotly_chart(fig)
127
-
128
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
129
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
130
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
131
-
132
  else:
133
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
134
-
135
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
136
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
137
-
138
-
139
 
140
 
141
 
 
63
 
64
 
65
 
66
+ txt = st.text_area("Job description", key="text 1")
67
  job = pd.Series(txt, name="Text")
68
  st.dataframe(job)
69
 
 
76
  uploaded_files = st.file_uploader(
77
  "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate_upload"
78
  )
79
+ if uploaded_files:
80
+ st.session_state['upload_count'] += 1
81
+ for uploaded_file in uploaded_files:
82
+ pdf_reader = PdfReader(uploaded_file)
83
+ text_data = ""
84
+ for page in pdf_reader.pages:
85
+ text_data += page.extract_text()
86
+ data = pd.Series(text_data, name='Text')
87
+ st.dataframe(data)
88
+
89
+ frames = [job, data]
90
+ result = pd.concat(frames)
91
+ st.dataframe(result)
92
+
93
+ model = pipeline("ner", model="xomad/gliner-model-merge-large-v1.0", aggregation_strategy="simple")
94
+ labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
95
+ entities = model(text_data, labels=labels)
96
+ df_entities = pd.DataFrame(entities)
97
+ st.subheader("Extracted Entities:")
98
+ st.dataframe(df_entities)
99
+
100
+ fig = px.treemap(df_entities, path=[px.Constant("all"), 'word', 'entity_group'],
101
+ values='score', color='entity_group')
102
+ fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
103
+ st.plotly_chart(fig)
104
+
105
+ vectorizer = TfidfVectorizer()
106
+ tfidf_matrix = vectorizer.fit_transform(result)
107
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
108
+ st.subheader("TF-IDF Values:")
109
+ st.dataframe(tfidf_df)
110
+
111
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
112
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
113
+ st.subheader("Cosine Similarity Matrix:")
114
+ st.dataframe(cosine_sim_df)
115
+
116
+ st.subheader("A score closer to 1 means closer match")
117
+ fig = px.imshow(cosine_sim_df, text_auto=True,
118
+ labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
119
+ x=['Job Description'] + [f'Candidate {i+1}' for i in range(len(uploaded_files))],
120
+ y=['Job Description'] + [f'Candidate {i+1}' for i in range(len(uploaded_files))])
121
+ st.plotly_chart(fig)
122
+
123
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
124
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
125
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
 
 
 
 
 
 
126
  else:
127
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts}). Please refresh to upload more files.")
128
+
129
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
130
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
131
+
 
132
 
133
 
134