nlpblogs commited on
Commit
b65c592
·
verified ·
1 Parent(s): bb8d1f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -42
app.py CHANGED
@@ -25,6 +25,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
 
28
  with st.sidebar:
29
  st.button("DEMO APP", type="primary")
30
 
@@ -63,7 +64,7 @@ with st.sidebar:
63
 
64
 
65
 
66
- txt = st.text_area("Job description", key="text 1")
67
  job = pd.Series(txt, name="Text")
68
  st.dataframe(job)
69
 
@@ -83,52 +84,52 @@ if st.session_state['upload_count'] < max_attempts:
83
  text_data = ""
84
  for page in pdf_reader.pages:
85
  text_data += page.extract_text()
86
- data = pd.Series(text_data, name='Text')
87
- st.dataframe(data)
88
-
89
- frames = [job, data]
90
- result = pd.concat(frames)
91
- st.dataframe(result)
92
-
93
- model = GLiNER.from_pretrained("urchade/gliner_base")
94
- labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
95
- entities = model(text_data, labels=labels)
96
- df_entities = pd.DataFrame(entities)
97
- st.subheader("Extracted Entities:")
98
- st.dataframe(df_entities)
99
-
100
- fig = px.treemap(df_entities, path=[px.Constant("all"), 'word', 'entity_group'],
101
- values='score', color='entity_group')
102
- fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
103
- st.plotly_chart(fig)
104
-
105
- vectorizer = TfidfVectorizer()
106
- tfidf_matrix = vectorizer.fit_transform(result)
107
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
108
- st.subheader("TF-IDF Values:")
109
- st.dataframe(tfidf_df)
110
-
111
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
112
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
113
- st.subheader("Cosine Similarity Matrix:")
114
- st.dataframe(cosine_sim_df)
115
-
116
- st.subheader("A score closer to 1 means closer match")
117
- fig = px.imshow(cosine_sim_df, text_auto=True,
118
- labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
119
- x=['Job Description'] + [f'Candidate {i+1}' for i in range(len(uploaded_files))],
120
- y=['Job Description'] + [f'Candidate {i+1}' for i in range(len(uploaded_files))])
121
- st.plotly_chart(fig)
122
-
123
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
124
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
125
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
126
  else:
127
  st.warning(f"You have reached the maximum upload attempts ({max_attempts}). Please refresh to upload more files.")
128
 
129
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
130
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
131
-
 
 
132
 
133
 
134
 
 
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
28
+ import plotly.express as px
29
  with st.sidebar:
30
  st.button("DEMO APP", type="primary")
31
 
 
64
 
65
 
66
 
67
+ txt = st.text_area("Job description", key = "text 1")
68
  job = pd.Series(txt, name="Text")
69
  st.dataframe(job)
70
 
 
84
  text_data = ""
85
  for page in pdf_reader.pages:
86
  text_data += page.extract_text()
87
+ data = pd.Series(text_data, name = 'Text')
88
+ st.dataframe(data)
89
+ frames = [job, data]
90
+ result = pd.concat(frames)
91
+ st.dataframe(result)
92
+ model = GLiNER.from_pretrained("urchade/gliner_base")
93
+ labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
94
+ entities = model.predict_entities(text_data, labels)
95
+ df = pd.DataFrame(entities)
96
+ st.dataframe(entities)
97
+ st.dataframe(df)
98
+ fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
99
+ values='score', color='label')
100
+ fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
101
+ st.plotly_chart(fig)
102
+ vectorizer = TfidfVectorizer()
103
+ tfidf_matrix = vectorizer.fit_transform(result)
104
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
105
+ st.subheader("TF-IDF Values:")
106
+ st.dataframe(tfidf_df)
107
+
108
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
109
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
110
+ st.subheader("Cosine Similarity Matrix:")
111
+ st.dataframe(cosine_sim_df)
112
+
113
+
114
+ st.subheader("A score closer to 1 means closer match")
115
+
116
+ fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
117
+ x=['text1', 'Jon Description'],
118
+ y=['text1', 'Job Description'])
119
+ st.plotly_chart(fig)
120
+
121
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
122
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
123
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
124
+
 
 
125
  else:
126
  st.warning(f"You have reached the maximum upload attempts ({max_attempts}). Please refresh to upload more files.")
127
 
128
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
129
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
130
+
131
+
132
+
133
 
134
 
135