nlpblogs commited on
Commit
bf47e43
·
verified ·
1 Parent(s): c927f4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -49
app.py CHANGED
@@ -25,39 +25,27 @@ from sklearn.feature_extraction.text import TfidfVectorizer
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
28
- txt1 = st.text_area("Job description", key = "text 1")
29
- job_description_series1 = pd.Series(txt1, name="Text")
30
- st.dataframe(job_description_series1)
31
-
32
- from transformers import pipeline
33
-
34
 
35
  uploaded_files = st.file_uploader(
36
- "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate 1"
37
  )
38
 
39
- all_resumes_text = [] # Store the text content and entities of each PDF
40
 
41
- if uploaded_files:
42
-
43
 
 
44
  for uploaded_file in uploaded_files:
45
- pdf_reader = PdfReader(uploaded_file)
46
- text_data = ""
47
- for page in pdf_reader.pages:
48
- text_data += page.extract_text()
49
- model = GLiNER.from_pretrained("urchade/gliner_base")
50
- labels = ["person", "country", "organization", "time", "role"]
51
- entities = model.predict_entities(text_data, labels)
52
-
53
- entity_dict = {}
54
- for label in labels:
55
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
56
-
57
- data = {"Text": text_data, **entity_dict}
58
- all_resumes_text.append(data)
59
-
60
-
61
 
62
  if all_resumes_text:
63
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
@@ -83,36 +71,27 @@ if uploaded_files:
83
 
84
  st.divider()
85
 
86
- txt2 = st.text_area("Job description", key = "text 2")
87
- job_description_series2 = pd.Series(txt2, name="Text")
88
- st.dataframe(job_description_series2)
89
-
90
 
91
  uploaded_files = st.file_uploader(
92
- "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate 2"
93
  )
94
 
95
- all_resumes_text = [] # Store the text content and entities of each PDF
96
 
97
- if uploaded_files:
98
-
99
 
 
100
  for uploaded_file in uploaded_files:
101
- pdf_reader = PdfReader(uploaded_file)
102
- text_data = ""
103
- for page in pdf_reader.pages:
104
- text_data += page.extract_text()
105
- model = GLiNER.from_pretrained("urchade/gliner_base")
106
- labels = ["person", "country", "organization", "time", "role"]
107
- entities = model.predict_entities(text_data, labels)
108
-
109
- entity_dict = {}
110
- for label in labels:
111
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
112
-
113
- data = {"Text": text_data, **entity_dict}
114
- all_resumes_text.append(data)
115
-
116
 
117
  if all_resumes_text:
118
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
@@ -134,4 +113,8 @@ if uploaded_files:
134
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
135
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
136
 
 
 
 
 
137
 
 
25
  from sklearn.metrics.pairwise import cosine_similarity
26
  import tempfile
27
 
28
+ txt = st.text_area("Job description", key = "text 1")
29
+ job_description_series = pd.Series(txt, name="Text")
30
+ st.dataframe(job_description_series)
 
 
 
31
 
32
  uploaded_files = st.file_uploader(
33
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 1"
34
  )
35
 
 
36
 
37
+ all_resumes_text = [] # Store the text content of each PDF
 
38
 
39
+ if uploaded_files:
40
  for uploaded_file in uploaded_files:
41
+ try:
42
+ pdf_reader = PdfReader(uploaded_file)
43
+ text_data = ""
44
+ for page in pdf_reader.pages:
45
+ text_data += page.extract_text()
46
+ all_resumes_text.append(text_data)
47
+ except Exception as e:
48
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
 
 
 
 
 
 
 
 
49
 
50
  if all_resumes_text:
51
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
 
71
 
72
  st.divider()
73
 
74
+ txt = st.text_area("Job description", key = "text 2")
75
+ job_description_series = pd.Series(txt, name="Text")
76
+ st.dataframe(job_description_series)
 
77
 
78
  uploaded_files = st.file_uploader(
79
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 2"
80
  )
81
 
 
82
 
83
+ all_resumes_text = [] # Store the text content of each PDF
 
84
 
85
+ if uploaded_files:
86
  for uploaded_file in uploaded_files:
87
+ try:
88
+ pdf_reader = PdfReader(uploaded_file)
89
+ text_data = ""
90
+ for page in pdf_reader.pages:
91
+ text_data += page.extract_text()
92
+ all_resumes_text.append(text_data)
93
+ except Exception as e:
94
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
 
 
 
 
 
 
 
95
 
96
  if all_resumes_text:
97
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
 
113
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
114
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
115
 
116
+
117
+
118
+
119
+
120