nlpblogs commited on
Commit
caf704e
·
verified ·
1 Parent(s): ebf4966

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -78
app.py CHANGED
@@ -63,110 +63,83 @@ with st.sidebar:
63
 
64
 
65
 
66
- st.subheader("AI Resume Analysis based on keywords", divider="red")
67
-
68
-
 
 
 
 
69
 
70
- txt = st.text_area("Job description", key = "text 1")
71
  job = pd.Series(txt, name="Text")
72
  st.dataframe(job)
73
 
74
- if 'uploaded_files' not in st.session_state:
75
- st.session_state['url_count'] = 0
76
 
77
  max_attempts = 5
78
 
79
- uploaded_files = st.file_uploader(
80
- "Choose a CSV file", accept_multiple_files=True, type = "pdf", key = "candidate 1"
81
- )
82
-
83
-
84
-
85
-
86
-
87
-
88
- for uploaded_file in uploaded_files:
89
- pdf_reader = PdfReader(uploaded_file)
90
- text_data = ""
91
- for page in pdf_reader.pages:
92
- text_data += page.extract_text()
93
- data = pd.Series(text_data, name = 'Text')
94
- st.dataframe(data)
95
-
96
- frames = [job, data]
97
- result = pd.concat(frames)
98
- st.dataframe(result)
99
-
100
- model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
101
- labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
102
- entities = model.predict_entities(text_data, labels)
103
- df = pd.DataFrame(entities)
104
- st.dataframe(entities)
105
- st.dataframe(df)
 
 
 
 
 
106
 
107
- import plotly.express as px
108
- fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
109
- values='score', color='label')
110
- fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
111
- st.plotly_chart(fig)
112
-
113
-
114
-
115
-
116
-
117
-
118
-
119
-
120
-
121
-
122
  vectorizer = TfidfVectorizer()
123
  tfidf_matrix = vectorizer.fit_transform(result)
124
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
125
  st.subheader("TF-IDF Values:")
126
  st.dataframe(tfidf_df)
127
-
128
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
129
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
130
  st.subheader("Cosine Similarity Matrix:")
131
  st.dataframe(cosine_sim_df)
132
 
133
- import plotly.express as px
134
- st.subheader("A score closer to 1 means closer match")
135
-
136
- fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
137
- x=['text1', 'Jon Description'],
138
- y=['text1', 'Job Description'])
139
  st.plotly_chart(fig)
140
 
141
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
142
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
143
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
144
 
145
-
146
- if 'uploaded_files' in st.session_state:
147
- st.write(f"URL pasted {st.session_state['uploaded_files']} times.")
148
-
149
 
 
 
150
 
151
 
152
 
153
- st.divider()
154
-
155
- txt = st.text_area("Job description", key = "text 2")
156
- job = pd.Series(txt, name="Text")
157
- st.dataframe(job)
158
 
159
- uploaded_files = st.file_uploader(
160
- "Choose a CSV file", accept_multiple_files=True, type = "pdf", key = "candidate 2"
161
- )
162
- for uploaded_file in uploaded_files:
163
- pdf_reader = PdfReader(uploaded_file)
164
- text_data = ""
165
- for page in pdf_reader.pages:
166
- text_data += page.extract_text()
167
- data = pd.Series(text_data, name = 'Text')
168
- st.dataframe(data)
169
- frames = [job, data]
170
- result = pd.concat(frames)
171
- st.dataframe(result)
172
-
 
63
 
64
 
65
 
66
+ import streamlit as st
67
+ import pandas as pd
68
+ from pypdf import PdfReader
69
+ from transformers import pipeline
70
+ from sklearn.feature_extraction.text import TfidfVectorizer
71
+ from sklearn.metrics.pairwise import cosine_similarity
72
+ import plotly.express as px
73
 
74
+ txt = st.text_area("Job description", key="text 1")
75
  job = pd.Series(txt, name="Text")
76
  st.dataframe(job)
77
 
78
+ if 'upload_count' not in st.session_state:
79
+ st.session_state['upload_count'] = 0
80
 
81
  max_attempts = 5
82
 
83
+ if st.session_state['upload_count'] < max_attempts:
84
+ uploaded_files = st.file_uploader(
85
+ "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate_upload"
86
+ )
87
+
88
+ if uploaded_files:
89
+ st.session_state['upload_count'] += 1
90
+ all_resumes_text = []
91
+ for uploaded_file in uploaded_files:
92
+ pdf_reader = PdfReader(uploaded_file)
93
+ text_data = ""
94
+ for page in pdf_reader.pages:
95
+ text_data += page.extract_text()
96
+ all_resumes_text.append(text_data)
97
+ data = pd.Series(text_data, name='Text')
98
+ st.dataframe(data)
99
+
100
+ frames = [job, data]
101
+ result = pd.concat(frames)
102
+ st.dataframe(result)
103
+
104
+ model = pipeline("ner", model="xomad/gliner-model-merge-large-v1.0", aggregation_strategy="simple")
105
+ labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
106
+ entities = model(text_data, labels=labels)
107
+ df_entities = pd.DataFrame(entities)
108
+ st.subheader("Extracted Entities:")
109
+ st.dataframe(df_entities)
110
+
111
+ fig = px.treemap(df_entities, path=[px.Constant("all"), 'word', 'entity_group'],
112
+ values='score', color='entity_group')
113
+ fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
114
+ st.plotly_chart(fig)
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  vectorizer = TfidfVectorizer()
117
  tfidf_matrix = vectorizer.fit_transform(result)
118
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
119
  st.subheader("TF-IDF Values:")
120
  st.dataframe(tfidf_df)
121
+
122
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
123
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
124
  st.subheader("Cosine Similarity Matrix:")
125
  st.dataframe(cosine_sim_df)
126
 
127
+ fig = px.imshow(cosine_sim_df, text_auto=True,
128
+ labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
129
+ x=['Job Description'] + [f'Candidate {i+1}' for i in range(len(all_resumes_text))],
130
+ y=['Job Description'] + [f'Candidate {i+1}' for i in range(len(all_resumes_text))])
 
 
131
  st.plotly_chart(fig)
132
 
133
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
134
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
135
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
136
 
137
+ else:
138
+ st.warning(f"Maximum upload attempts reached ({max_attempts}). Please refresh to upload more files.")
 
 
139
 
140
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
141
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
142
 
143
 
144
 
 
 
 
 
 
145