nlpblogs commited on
Commit
5196b87
·
verified ·
1 Parent(s): 05e7e94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -22
app.py CHANGED
@@ -29,35 +29,38 @@ txt1 = st.text_area("Job description", key = "text 1")
29
  job_description_series1 = pd.Series(txt1, name="Text")
30
  st.dataframe(job_description_series1)
31
 
 
 
 
32
  uploaded_files = st.file_uploader(
33
- "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 1"
34
  )
35
 
36
-
37
- all_resumes_text1 = [] # Store the text content of each PDF
38
 
39
  if uploaded_files:
 
 
 
40
  for uploaded_file in uploaded_files:
41
  pdf_reader = PdfReader(uploaded_file)
42
  text_data = ""
43
  for page in pdf_reader.pages:
44
  text_data += page.extract_text()
45
- model = GLiNER.from_pretrained("urchade/gliner_base")
46
- labels = ["person", "country", "organization", "time", "role"]
47
- entities = model.predict_entities(text_data, labels)
48
 
49
  entity_dict = {}
50
  for label in labels:
51
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
52
 
53
  data = {"Text": text_data, **entity_dict}
54
-
55
-
56
 
57
- all_resumes_text1.append(data)
58
 
59
 
60
- if all_resumes_text1:
61
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
62
 
63
  vectorizer = TfidfVectorizer()
@@ -85,35 +88,36 @@ txt2 = st.text_area("Job description", key = "text 2")
85
  job_description_series2 = pd.Series(txt2, name="Text")
86
  st.dataframe(job_description_series2)
87
 
 
88
  uploaded_files = st.file_uploader(
89
- "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 2"
90
  )
91
 
92
-
93
- all_resumes_text2 = [] # Store the text content of each PDF
94
 
95
  if uploaded_files:
 
 
 
96
  for uploaded_file in uploaded_files:
97
  pdf_reader = PdfReader(uploaded_file)
98
  text_data = ""
99
  for page in pdf_reader.pages:
100
  text_data += page.extract_text()
101
- model = GLiNER.from_pretrained("urchade/gliner_base")
102
- labels = ["person", "country", "organization", "time", "role"]
103
- entities = model.predict_entities(text_data, labels)
104
 
105
  entity_dict = {}
106
  for label in labels:
107
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
108
 
109
  data = {"Text": text_data, **entity_dict}
110
-
111
-
112
 
113
- all_resumes_text2.append(data)
114
 
115
 
116
- if all_resumes_text2:
117
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
118
 
119
  vectorizer = TfidfVectorizer()
 
29
  job_description_series1 = pd.Series(txt1, name="Text")
30
  st.dataframe(job_description_series1)
31
 
32
+
33
+
34
+
35
  uploaded_files = st.file_uploader(
36
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate 1"
37
  )
38
 
39
+ all_resumes_text = [] # Store the text content and entities of each PDF
 
40
 
41
  if uploaded_files:
42
+ model = pipeline("ner", model="urchade/gliner_base", aggregation_strategy="simple")
43
+ labels = ["person", "country", "organization", "time", "role"]
44
+
45
  for uploaded_file in uploaded_files:
46
  pdf_reader = PdfReader(uploaded_file)
47
  text_data = ""
48
  for page in pdf_reader.pages:
49
  text_data += page.extract_text()
50
+
51
+ entities = model(text_data)
 
52
 
53
  entity_dict = {}
54
  for label in labels:
55
+ entity_dict[label] = [entity["word"] for entity in entities if entity["entity_group"] == label]
56
 
57
  data = {"Text": text_data, **entity_dict}
58
+ all_resumes_text.append(data)
 
59
 
60
+ st.write("Processed data:", all_resumes_text) # For demonstration
61
 
62
 
63
+ if all_resumes_text:
64
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
65
 
66
  vectorizer = TfidfVectorizer()
 
88
  job_description_series2 = pd.Series(txt2, name="Text")
89
  st.dataframe(job_description_series2)
90
 
91
+
92
  uploaded_files = st.file_uploader(
93
+ "Choose a PDF file(s) for candidate profiles", type="pdf", key="candidate 2"
94
  )
95
 
96
+ all_resumes_text = [] # Store the text content and entities of each PDF
 
97
 
98
  if uploaded_files:
99
+ model = pipeline("ner", model="urchade/gliner_base", aggregation_strategy="simple")
100
+ labels = ["person", "country", "organization", "time", "role"]
101
+
102
  for uploaded_file in uploaded_files:
103
  pdf_reader = PdfReader(uploaded_file)
104
  text_data = ""
105
  for page in pdf_reader.pages:
106
  text_data += page.extract_text()
107
+
108
+ entities = model(text_data)
 
109
 
110
  entity_dict = {}
111
  for label in labels:
112
+ entity_dict[label] = [entity["word"] for entity in entities if entity["entity_group"] == label]
113
 
114
  data = {"Text": text_data, **entity_dict}
115
+ all_resumes_text1.append(data)
 
116
 
117
+ st.write("Processed data:", all_resumes_text) # For demonstration
118
 
119
 
120
+ if all_resumes_text:
121
  all_documents = [job_description_series.iloc[0]] + all_resumes_text
122
 
123
  vectorizer = TfidfVectorizer()