nlpblogs commited on
Commit
2955054
·
verified ·
1 Parent(s): bc4e2d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -112
app.py CHANGED
@@ -1,36 +1,16 @@
1
- import streamlit as st
2
- from PyPDF2 import PdfReader
3
- import pandas as pd
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
-
6
- from sklearn.metrics.pairwise import cosine_similarity
7
-
8
  import streamlit as st
9
  from PyPDF2 import PdfReader
10
  import pandas as pd
11
  from sklearn.feature_extraction.text import TfidfVectorizer
12
  from sklearn.metrics.pairwise import cosine_similarity
13
  from gliner import GLiNER
14
-
15
-
16
- import streamlit as st
17
- import pandas as pd
18
- from PyPDF2 import PdfReader
19
- from gliner import GLiNER
20
-
21
- import streamlit as st
22
- import pandas as pd
23
- from PyPDF2 import PdfReader
24
- from sklearn.feature_extraction.text import TfidfVectorizer
25
- from sklearn.metrics.pairwise import cosine_similarity
26
- import tempfile
27
-
28
  import plotly.express as px
 
29
  with st.sidebar:
30
  st.button("DEMO APP", type="primary")
31
 
32
 
33
- expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
34
  expander.write('''
35
 
36
 
@@ -38,16 +18,13 @@ with st.sidebar:
38
  This app accepts files in .pdf formats.
39
 
40
  **How to Use**
41
- Upload your file first. Then, click the 'Results' button.
42
 
43
  **Usage Limits**
44
- You can request results up to 5 times.
45
 
46
  **Subscription Management**
47
- This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own Named Entity Recognition (NER) Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app within five business days. If you wish to delete your Account with us, please contact us at [email protected]
48
-
49
- **Authorization**
50
- For security purposes, your authorization access expires hourly. To restore access, click the "Request Authorization" button.
51
 
52
  **Customization**
53
  To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
@@ -63,19 +40,20 @@ with st.sidebar:
63
  ''')
64
 
65
 
66
-
 
67
  txt = st.text_area("Job description", key = "text 1")
68
  job = pd.Series(txt, name="Text")
69
- st.dataframe(job)
70
 
71
  if 'upload_count' not in st.session_state:
72
  st.session_state['upload_count'] = 0
73
 
74
- max_attempts = 20
75
 
76
  if st.session_state['upload_count'] < max_attempts:
77
  uploaded_files = st.file_uploader(
78
- "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate 1"
79
  )
80
  if uploaded_files:
81
  st.session_state['upload_count'] += 1
@@ -85,37 +63,31 @@ if st.session_state['upload_count'] < max_attempts:
85
  for page in pdf_reader.pages:
86
  text_data += page.extract_text()
87
  data = pd.Series(text_data, name = 'Text')
88
- st.dataframe(data)
89
  frames = [job, data]
90
  result = pd.concat(frames)
91
- st.dataframe(result)
 
92
  model = GLiNER.from_pretrained("urchade/gliner_base")
93
  labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
94
  entities = model.predict_entities(text_data, labels)
95
  df = pd.DataFrame(entities)
96
- st.dataframe(entities)
97
- st.dataframe(df)
98
  fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
99
  values='score', color='label')
100
  fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
101
  st.plotly_chart(fig1, key = "figure 1")
 
102
  vectorizer = TfidfVectorizer()
103
  tfidf_matrix = vectorizer.fit_transform(result)
104
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
105
- st.subheader("TF-IDF Values:")
106
- st.dataframe(tfidf_df)
107
-
108
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
109
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
110
- st.subheader("Cosine Similarity Matrix:")
111
- st.dataframe(cosine_sim_df)
112
-
113
-
114
- st.subheader("A score closer to 1 means closer match")
115
 
116
- fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
117
- x=['text1', 'Jon Description'],
118
- y=['text1', 'Job Description'])
119
  st.plotly_chart(fig2, key = "figure 2")
120
 
121
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
@@ -123,77 +95,15 @@ if st.session_state['upload_count'] < max_attempts:
123
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
124
 
125
  else:
126
- st.warning(f"You have reached the maximum upload attempts ({max_attempts}). Please refresh to upload more files.")
127
 
128
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
129
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
130
 
131
 
132
- st.subheader("Candidate profile 2", divider = "green")
133
- txt = st.text_area("Job description", key = "text 2")
134
- job = pd.Series(txt, name="Text")
135
- st.dataframe(job)
136
 
137
- if 'upload_count' not in st.session_state:
138
- st.session_state['upload_count'] = 0
139
-
140
- max_attempts = 2
141
-
142
- if st.session_state['upload_count'] < max_attempts:
143
- uploaded_files = st.file_uploader(
144
- "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate 2"
145
- )
146
- if uploaded_files:
147
- st.session_state['upload_count'] += 1
148
- for uploaded_file in uploaded_files:
149
- pdf_reader = PdfReader(uploaded_file)
150
- text_data = ""
151
- for page in pdf_reader.pages:
152
- text_data += page.extract_text()
153
- data = pd.Series(text_data, name = 'Text')
154
- st.dataframe(data)
155
- frames = [job, data]
156
- result = pd.concat(frames)
157
- st.dataframe(result)
158
- model = GLiNER.from_pretrained("urchade/gliner_base")
159
- labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
160
- entities = model.predict_entities(text_data, labels)
161
- df = pd.DataFrame(entities)
162
- st.dataframe(entities)
163
- st.dataframe(df)
164
- fig3 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
165
- values='score', color='label')
166
- fig3.update_layout(margin = dict(t=50, l=25, r=25, b=25))
167
- st.plotly_chart(fig3, key = "figure 3")
168
-
169
- vectorizer = TfidfVectorizer()
170
- tfidf_matrix = vectorizer.fit_transform(result)
171
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
172
- st.subheader("TF-IDF Values:")
173
- st.dataframe(tfidf_df)
174
-
175
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
176
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
177
- st.subheader("Cosine Similarity Matrix:")
178
- st.dataframe(cosine_sim_df)
179
-
180
-
181
- st.subheader("A score closer to 1 means closer match")
182
-
183
- fig4 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
184
- x=['text1', 'Jon Description'],
185
- y=['text1', 'Job Description'])
186
- st.plotly_chart(fig4, key = "figure 4")
187
-
188
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
189
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
190
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
191
-
192
- else:
193
- st.warning(f"You have reached the maximum upload attempts ({max_attempts}). Please refresh to upload more files.")
194
-
195
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
196
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
197
 
198
 
199
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
  import pandas as pd
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from gliner import GLiNER
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import plotly.express as px
8
+
9
  with st.sidebar:
10
  st.button("DEMO APP", type="primary")
11
 
12
 
13
+ expander = st.expander("**Important notes on the AI Resume Analysis based on Keywords App**")
14
  expander.write('''
15
 
16
 
 
18
  This app accepts files in .pdf formats.
19
 
20
  **How to Use**
21
+ Paste the job description first. Then, upload your resume to retrieve the results. You can upload up to 10 resumes in total.
22
 
23
  **Usage Limits**
24
+ You can request results up to 10 times in total.
25
 
26
  **Subscription Management**
27
+ This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own AI Resume Analysis based on Keywords Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app within five business days. If you wish to delete your Account with us, please contact us at [email protected]
 
 
 
28
 
29
  **Customization**
30
  To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
 
40
  ''')
41
 
42
 
43
+ st.subheader("Candidate Profile 1, divider = "green")
44
+
45
  txt = st.text_area("Job description", key = "text 1")
46
  job = pd.Series(txt, name="Text")
47
+
48
 
49
  if 'upload_count' not in st.session_state:
50
  st.session_state['upload_count'] = 0
51
 
52
+ max_attempts = 2
53
 
54
  if st.session_state['upload_count'] < max_attempts:
55
  uploaded_files = st.file_uploader(
56
+ "Upload your resume in .pdf format", type="pdf", key="candidate 1"
57
  )
58
  if uploaded_files:
59
  st.session_state['upload_count'] += 1
 
63
  for page in pdf_reader.pages:
64
  text_data += page.extract_text()
65
  data = pd.Series(text_data, name = 'Text')
 
66
  frames = [job, data]
67
  result = pd.concat(frames)
68
+
69
+
70
  model = GLiNER.from_pretrained("urchade/gliner_base")
71
  labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
72
  entities = model.predict_entities(text_data, labels)
73
  df = pd.DataFrame(entities)
74
+
75
+
76
  fig1 = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
77
  values='score', color='label')
78
  fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25))
79
  st.plotly_chart(fig1, key = "figure 1")
80
+
81
  vectorizer = TfidfVectorizer()
82
  tfidf_matrix = vectorizer.fit_transform(result)
83
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
 
 
 
84
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
85
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
86
+
 
 
 
 
87
 
88
+ fig2 = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
89
+ x=['Resume 1', 'Jon Description'],
90
+ y=['Resume 1', 'Job Description'])
91
  st.plotly_chart(fig2, key = "figure 2")
92
 
93
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
 
95
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
96
 
97
  else:
98
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts})")
99
 
100
  if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
101
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
102
 
103
 
 
 
 
 
104
 
105
+
106
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
 
109