nlpblogs commited on
Commit
1a68e65
·
verified ·
1 Parent(s): e0fd9ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -67
app.py CHANGED
@@ -25,100 +25,116 @@ from sklearn.feature_extraction.text import TfidfVectorizer
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
28
- txt1 = st.text_area("Job description", key="text 1")
29
  job_description_series1 = pd.Series(txt1, name="Text")
30
  st.dataframe(job_description_series1)
31
 
32
- uploaded_files1 = st.file_uploader(
33
- "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate_1"
34
  )
 
 
35
  all_resumes_text1 = [] # Store the text content of each PDF
36
- if uploaded_files1:
37
- for uploaded_file in uploaded_files1:
 
38
  try:
39
- if uploaded_file is not None: # Check if a file was uploaded
40
- pdf_reader = PdfReader(uploaded_file)
41
- text_data = ""
42
- for page in pdf_reader.pages:
43
- text_data += page.extract_text()
44
- model = GLiNER.from_pretrained("urchade/gliner_base")
45
- labels = ["person", "country", "organization", "time", "role"]
46
- entities = model.predict_entities(text_data, labels)
47
- entity_dict = {}
48
- for label in labels:
49
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
50
- data = {"Text": text_data, **entity_dict}
51
- all_resumes_text1.append(text_data)
52
- else:
53
- st.warning("No file uploaded for the first set of candidates.")
 
 
54
  except Exception as e:
55
- if hasattr(uploaded_file, 'name'):
56
- st.error(f"Error processing file {uploaded_file.name}: {e}")
57
- else:
58
- st.error(f"Error processing a file: {e}")
59
 
60
  if all_resumes_text1:
61
- all_documents1 = [job_description_series1.iloc[0]] + all_resumes_text1
62
- vectorizer1 = TfidfVectorizer()
63
- tfidf_matrix1 = vectorizer1.fit_transform(all_documents1)
64
- tfidf_df1 = pd.DataFrame(tfidf_matrix1.toarray(), columns=vectorizer1.get_feature_names_out())
 
 
65
  st.subheader("TF-IDF Values:")
66
- st.dataframe(tfidf_df1)
67
- cosine_sim_matrix1 = cosine_similarity(tfidf_matrix1)
68
- cosine_sim_df1 = pd.DataFrame(cosine_sim_matrix1)
 
69
  st.subheader("Cosine Similarity Matrix:")
70
- st.dataframe(cosine_sim_df1)
 
71
  # Display similarity scores between the job description and each resume
72
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
73
- for i, similarity_score in enumerate(cosine_sim_matrix1[0][1:]):
74
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
75
 
 
 
76
  st.divider()
77
 
78
- txt2 = st.text_area("Job description", key="text 2")
79
  job_description_series2 = pd.Series(txt2, name="Text")
80
  st.dataframe(job_description_series2)
81
 
82
- uploaded_files2 = st.file_uploader(
83
- "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate_2"
84
  )
 
 
85
  all_resumes_text2 = [] # Store the text content of each PDF
86
- if uploaded_files2:
87
- for uploaded_file in uploaded_files2:
 
88
  try:
89
- if uploaded_file is not None: # Check if a file was uploaded
90
- pdf_reader = PdfReader(uploaded_file)
91
- text_data = ""
92
- for page in pdf_reader.pages:
93
- text_data += page.extract_text()
94
- model = GLiNER.from_pretrained("urchade/gliner_base")
95
- labels = ["person", "country", "organization", "time", "role"]
96
- entities = model.predict_entities(text_data, labels)
97
- entity_dict = {}
98
- for label in labels:
99
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
100
- data = {"Text": text_data, **entity_dict}
101
- all_resumes_text2.append(text_data)
102
- else:
103
- st.warning("No file uploaded for the second set of candidates.")
 
 
104
  except Exception as e:
105
- if hasattr(uploaded_file, 'name'):
106
- st.error(f"Error processing file {uploaded_file.name}: {e}")
107
- else:
108
- st.error(f"Error processing a file: {e}")
109
 
110
  if all_resumes_text2:
111
- all_documents2 = [job_description_series2.iloc[0]] + all_resumes_text2
112
- vectorizer2 = TfidfVectorizer()
113
- tfidf_matrix2 = vectorizer2.fit_transform(all_documents2)
114
- tfidf_df2 = pd.DataFrame(tfidf_matrix2.toarray(), columns=vectorizer2.get_feature_names_out())
 
 
115
  st.subheader("TF-IDF Values:")
116
- st.dataframe(tfidf_df2)
117
- cosine_sim_matrix2 = cosine_similarity(tfidf_matrix2)
118
- cosine_sim_df2 = pd.DataFrame(cosine_sim_matrix2)
 
119
  st.subheader("Cosine Similarity Matrix:")
120
- st.dataframe(cosine_sim_df2)
 
121
  # Display similarity scores between the job description and each resume
122
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
123
- for i, similarity_score in enumerate(cosine_sim_matrix2[0][1:]):
124
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
 
 
 
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
28
+ txt1 = st.text_area("Job description", key = "text 1")
29
  job_description_series1 = pd.Series(txt1, name="Text")
30
  st.dataframe(job_description_series1)
31
 
32
+ uploaded_files = st.file_uploader(
33
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 1"
34
  )
35
+
36
+
37
  all_resumes_text1 = [] # Store the text content of each PDF
38
+
39
+ if uploaded_files:
40
+ for uploaded_file in uploaded_files:
41
  try:
42
+ pdf_reader = PdfReader(uploaded_file)
43
+ text_data = ""
44
+ for page in pdf_reader.pages:
45
+ text_data += page.extract_text()
46
+ model = GLiNER.from_pretrained("urchade/gliner_base")
47
+ labels = ["person", "country", "organization", "time", "role"]
48
+ entities = model.predict_entities(text_data, labels)
49
+
50
+ entity_dict = {}
51
+ for label in labels:
52
+ entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
53
+
54
+ data = {"Text": text_data, **entity_dict}
55
+
56
+
57
+
58
+ all_resumes_text1.append(data)
59
  except Exception as e:
60
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
 
 
 
61
 
62
  if all_resumes_text1:
63
+ all_documents = [job_description_series.iloc[0]] + all_resumes_text
64
+
65
+ vectorizer = TfidfVectorizer()
66
+ tfidf_matrix = vectorizer.fit_transform(all_documents)
67
+
68
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
69
  st.subheader("TF-IDF Values:")
70
+ st.dataframe(tfidf_df)
71
+
72
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
73
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
74
  st.subheader("Cosine Similarity Matrix:")
75
+ st.dataframe(cosine_sim_df)
76
+
77
  # Display similarity scores between the job description and each resume
78
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
79
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
80
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
81
 
82
+
83
+
84
  st.divider()
85
 
86
+ txt2 = st.text_area("Job description", key = "text 2")
87
  job_description_series2 = pd.Series(txt2, name="Text")
88
  st.dataframe(job_description_series2)
89
 
90
+ uploaded_files = st.file_uploader(
91
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 2"
92
  )
93
+
94
+
95
  all_resumes_text2 = [] # Store the text content of each PDF
96
+
97
+ if uploaded_files:
98
+ for uploaded_file in uploaded_files:
99
  try:
100
+ pdf_reader = PdfReader(uploaded_file)
101
+ text_data = ""
102
+ for page in pdf_reader.pages:
103
+ text_data += page.extract_text()
104
+ model = GLiNER.from_pretrained("urchade/gliner_base")
105
+ labels = ["person", "country", "organization", "time", "role"]
106
+ entities = model.predict_entities(text_data, labels)
107
+
108
+ entity_dict = {}
109
+ for label in labels:
110
+ entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
111
+
112
+ data = {"Text": text_data, **entity_dict}
113
+
114
+
115
+
116
+ all_resumes_text2.append(data)
117
  except Exception as e:
118
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
 
 
 
119
 
120
  if all_resumes_text2:
121
+ all_documents = [job_description_series.iloc[0]] + all_resumes_text
122
+
123
+ vectorizer = TfidfVectorizer()
124
+ tfidf_matrix = vectorizer.fit_transform(all_documents)
125
+
126
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
127
  st.subheader("TF-IDF Values:")
128
+ st.dataframe(tfidf_df)
129
+
130
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
131
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
132
  st.subheader("Cosine Similarity Matrix:")
133
+ st.dataframe(cosine_sim_df)
134
+
135
  # Display similarity scores between the job description and each resume
136
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
137
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
138
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
139
+
140
+