nlpblogs commited on
Commit
c927f4a
·
verified ·
1 Parent(s): 0d4d325

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -21
app.py CHANGED
@@ -39,25 +39,24 @@ uploaded_files = st.file_uploader(
39
  all_resumes_text = [] # Store the text content and entities of each PDF
40
 
41
  if uploaded_files:
42
- model = GLiNER.from_pretrained("urchade/gliner_base")
43
- labels = ["person", "country", "organization", "time", "role"]
44
 
45
  for uploaded_file in uploaded_files:
46
  pdf_reader = PdfReader(uploaded_file)
47
  text_data = ""
48
  for page in pdf_reader.pages:
49
  text_data += page.extract_text()
 
 
 
50
 
51
- entities = model(text_data)
 
 
52
 
53
- entity_dict = {}
54
- for label in labels:
55
- entity_dict[label] = [entity["word"] for entity in entities if entity["entity_group"] == label]
56
 
57
- data = {"Text": text_data, **entity_dict}
58
- all_resumes_text.append(data)
59
-
60
- st.write("Processed data:", all_resumes_text) # For demonstration
61
 
62
 
63
  if all_resumes_text:
@@ -96,25 +95,23 @@ uploaded_files = st.file_uploader(
96
  all_resumes_text = [] # Store the text content and entities of each PDF
97
 
98
  if uploaded_files:
99
- model = GLiNER.from_pretrained("urchade/gliner_base")
100
- labels = ["person", "country", "organization", "time", "role"]
101
 
102
  for uploaded_file in uploaded_files:
103
  pdf_reader = PdfReader(uploaded_file)
104
  text_data = ""
105
  for page in pdf_reader.pages:
106
  text_data += page.extract_text()
 
 
 
107
 
108
- entities = model(text_data)
109
-
110
- entity_dict = {}
111
- for label in labels:
112
- entity_dict[label] = [entity["word"] for entity in entities if entity["entity_group"] == label]
113
-
114
- data = {"Text": text_data, **entity_dict}
115
- all_resumes_text1.append(data)
116
 
117
- st.write("Processed data:", all_resumes_text) # For demonstration
 
118
 
119
 
120
  if all_resumes_text:
 
39
  all_resumes_text = [] # Store the text content and entities of each PDF
40
 
41
  if uploaded_files:
42
+
 
43
 
44
  for uploaded_file in uploaded_files:
45
  pdf_reader = PdfReader(uploaded_file)
46
  text_data = ""
47
  for page in pdf_reader.pages:
48
  text_data += page.extract_text()
49
+ model = GLiNER.from_pretrained("urchade/gliner_base")
50
+ labels = ["person", "country", "organization", "time", "role"]
51
+ entities = model.predict_entities(text_data, labels)
52
 
53
+ entity_dict = {}
54
+ for label in labels:
55
+ entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
56
 
57
+ data = {"Text": text_data, **entity_dict}
58
+ all_resumes_text.append(data)
 
59
 
 
 
 
 
60
 
61
 
62
  if all_resumes_text:
 
95
  all_resumes_text = [] # Store the text content and entities of each PDF
96
 
97
  if uploaded_files:
98
+
 
99
 
100
  for uploaded_file in uploaded_files:
101
  pdf_reader = PdfReader(uploaded_file)
102
  text_data = ""
103
  for page in pdf_reader.pages:
104
  text_data += page.extract_text()
105
+ model = GLiNER.from_pretrained("urchade/gliner_base")
106
+ labels = ["person", "country", "organization", "time", "role"]
107
+ entities = model.predict_entities(text_data, labels)
108
 
109
+ entity_dict = {}
110
+ for label in labels:
111
+ entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
 
 
 
 
 
112
 
113
+ data = {"Text": text_data, **entity_dict}
114
+ all_resumes_text.append(data)
115
 
116
 
117
  if all_resumes_text: