nlpblogs commited on
Commit
e0fd9ad
·
verified ·
1 Parent(s): 4e4f827

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -89
app.py CHANGED
@@ -25,116 +25,100 @@ from sklearn.feature_extraction.text import TfidfVectorizer
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
28
- txt1 = st.text_area("Job description", key = "text 1")
29
  job_description_series1 = pd.Series(txt1, name="Text")
30
  st.dataframe(job_description_series1)
31
 
32
- uploaded_files = st.file_uploader(
33
- "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 1"
34
  )
35
-
36
-
37
- all_resumes_text = [] # Store the text content of each PDF
38
-
39
- if uploaded_files:
40
- for uploaded_file in uploaded_files:
41
  try:
42
- pdf_reader = PdfReader(uploaded_file)
43
- text_data = ""
44
- for page in pdf_reader.pages:
45
- text_data += page.extract_text()
46
- model = GLiNER.from_pretrained("urchade/gliner_base")
47
- labels = ["person", "country", "organization", "time", "role"]
48
- entities = model.predict_entities(text_data, labels)
49
-
50
- entity_dict = {}
51
- for label in labels:
52
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
53
-
54
- data = {"Text": text_data, **entity_dict}
55
-
56
-
57
-
58
- all_resumes_text.append(text_data)
59
  except Exception as e:
60
- st.error(f"Error processing file {uploaded_file.name}: {e}")
61
-
62
- if all_resumes_text:
63
- all_documents = [job_description_series.iloc[0]] + all_resumes_text
64
-
65
- vectorizer = TfidfVectorizer()
66
- tfidf_matrix = vectorizer.fit_transform(all_documents)
67
-
68
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
 
69
  st.subheader("TF-IDF Values:")
70
- st.dataframe(tfidf_df)
71
-
72
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
73
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
74
  st.subheader("Cosine Similarity Matrix:")
75
- st.dataframe(cosine_sim_df)
76
-
77
  # Display similarity scores between the job description and each resume
78
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
79
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
80
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
81
 
82
-
83
-
84
  st.divider()
85
 
86
- txt2 = st.text_area("Job description", key = "text 2")
87
  job_description_series2 = pd.Series(txt2, name="Text")
88
  st.dataframe(job_description_series2)
89
 
90
- uploaded_files = st.file_uploader(
91
- "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 2"
92
  )
93
-
94
-
95
- all_resumes_text = [] # Store the text content of each PDF
96
-
97
- if uploaded_files:
98
- for uploaded_file in uploaded_files:
99
  try:
100
- pdf_reader = PdfReader(uploaded_file)
101
- text_data = ""
102
- for page in pdf_reader.pages:
103
- text_data += page.extract_text()
104
- model = GLiNER.from_pretrained("urchade/gliner_base")
105
- labels = ["person", "country", "organization", "time", "role"]
106
- entities = model.predict_entities(text_data, labels)
107
-
108
- entity_dict = {}
109
- for label in labels:
110
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
111
-
112
- data = {"Text": text_data, **entity_dict}
113
-
114
-
115
-
116
- all_resumes_text.append(text_data)
117
  except Exception as e:
118
- st.error(f"Error processing file {uploaded_file.name}: {e}")
119
-
120
- if all_resumes_text:
121
- all_documents = [job_description_series.iloc[0]] + all_resumes_text
122
-
123
- vectorizer = TfidfVectorizer()
124
- tfidf_matrix = vectorizer.fit_transform(all_documents)
125
-
126
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
 
127
  st.subheader("TF-IDF Values:")
128
- st.dataframe(tfidf_df)
129
-
130
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
131
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
132
  st.subheader("Cosine Similarity Matrix:")
133
- st.dataframe(cosine_sim_df)
134
-
135
  # Display similarity scores between the job description and each resume
136
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
137
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
138
- st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
139
-
140
-
 
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
28
+ txt1 = st.text_area("Job description", key="text 1")
29
  job_description_series1 = pd.Series(txt1, name="Text")
30
  st.dataframe(job_description_series1)
31
 
32
+ uploaded_files1 = st.file_uploader(
33
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate_1"
34
  )
35
+ all_resumes_text1 = [] # Store the text content of each PDF
36
+ if uploaded_files1:
37
+ for uploaded_file in uploaded_files1:
 
 
 
38
  try:
39
+ if uploaded_file is not None: # Check if a file was uploaded
40
+ pdf_reader = PdfReader(uploaded_file)
41
+ text_data = ""
42
+ for page in pdf_reader.pages:
43
+ text_data += page.extract_text()
44
+ model = GLiNER.from_pretrained("urchade/gliner_base")
45
+ labels = ["person", "country", "organization", "time", "role"]
46
+ entities = model.predict_entities(text_data, labels)
47
+ entity_dict = {}
48
+ for label in labels:
49
+ entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
50
+ data = {"Text": text_data, **entity_dict}
51
+ all_resumes_text1.append(text_data)
52
+ else:
53
+ st.warning("No file uploaded for the first set of candidates.")
 
 
54
  except Exception as e:
55
+ if hasattr(uploaded_file, 'name'):
56
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
57
+ else:
58
+ st.error(f"Error processing a file: {e}")
59
+
60
+ if all_resumes_text1:
61
+ all_documents1 = [job_description_series1.iloc[0]] + all_resumes_text1
62
+ vectorizer1 = TfidfVectorizer()
63
+ tfidf_matrix1 = vectorizer1.fit_transform(all_documents1)
64
+ tfidf_df1 = pd.DataFrame(tfidf_matrix1.toarray(), columns=vectorizer1.get_feature_names_out())
65
  st.subheader("TF-IDF Values:")
66
+ st.dataframe(tfidf_df1)
67
+ cosine_sim_matrix1 = cosine_similarity(tfidf_matrix1)
68
+ cosine_sim_df1 = pd.DataFrame(cosine_sim_matrix1)
 
69
  st.subheader("Cosine Similarity Matrix:")
70
+ st.dataframe(cosine_sim_df1)
 
71
  # Display similarity scores between the job description and each resume
72
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
73
+ for i, similarity_score in enumerate(cosine_sim_matrix1[0][1:]):
74
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
75
 
 
 
76
  st.divider()
77
 
78
+ txt2 = st.text_area("Job description", key="text 2")
79
  job_description_series2 = pd.Series(txt2, name="Text")
80
  st.dataframe(job_description_series2)
81
 
82
+ uploaded_files2 = st.file_uploader(
83
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate_2"
84
  )
85
+ all_resumes_text2 = [] # Store the text content of each PDF
86
+ if uploaded_files2:
87
+ for uploaded_file in uploaded_files2:
 
 
 
88
  try:
89
+ if uploaded_file is not None: # Check if a file was uploaded
90
+ pdf_reader = PdfReader(uploaded_file)
91
+ text_data = ""
92
+ for page in pdf_reader.pages:
93
+ text_data += page.extract_text()
94
+ model = GLiNER.from_pretrained("urchade/gliner_base")
95
+ labels = ["person", "country", "organization", "time", "role"]
96
+ entities = model.predict_entities(text_data, labels)
97
+ entity_dict = {}
98
+ for label in labels:
99
+ entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
100
+ data = {"Text": text_data, **entity_dict}
101
+ all_resumes_text2.append(text_data)
102
+ else:
103
+ st.warning("No file uploaded for the second set of candidates.")
 
 
104
  except Exception as e:
105
+ if hasattr(uploaded_file, 'name'):
106
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
107
+ else:
108
+ st.error(f"Error processing a file: {e}")
109
+
110
+ if all_resumes_text2:
111
+ all_documents2 = [job_description_series2.iloc[0]] + all_resumes_text2
112
+ vectorizer2 = TfidfVectorizer()
113
+ tfidf_matrix2 = vectorizer2.fit_transform(all_documents2)
114
+ tfidf_df2 = pd.DataFrame(tfidf_matrix2.toarray(), columns=vectorizer2.get_feature_names_out())
115
  st.subheader("TF-IDF Values:")
116
+ st.dataframe(tfidf_df2)
117
+ cosine_sim_matrix2 = cosine_similarity(tfidf_matrix2)
118
+ cosine_sim_df2 = pd.DataFrame(cosine_sim_matrix2)
 
119
  st.subheader("Cosine Similarity Matrix:")
120
+ st.dataframe(cosine_sim_df2)
 
121
  # Display similarity scores between the job description and each resume
122
  st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
123
+ for i, similarity_score in enumerate(cosine_sim_matrix2[0][1:]):
124
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")