File size: 1,687 Bytes
5bea701 5a5c182 c40c6c3 88d066d 040362f 5bea701 9ac410d 88d066d 9ac410d 23fd868 aa023ef 23fd868 aa023ef 319dddf 6e78c7b 23fd868 6e78c7b 23fd868 6e78c7b 23fd868 6e78c7b 1d303e7 23fd868 1d303e7 79bc629 88d066d c40c6c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gliner import GLiNER
import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
from gliner import GLiNER
uploaded_files = st.file_uploader(
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type="pdf"
)
if uploaded_files:
all_data = [] # Store dictionaries of text and entities for each PDF
for i, uploaded_file in enumerate(uploaded_files):
try:
pdf_reader = PdfReader(uploaded_file)
text_data = ""
for page in pdf_reader.pages:
text_data += page.extract_text()
model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
entities = model.predict_entities(text_data, labels)
entity_dict = {}
for label in labels:
entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
data = {"Text": text_data, **entity_dict}
all_data.append(data)
except Exception as e:
st.error(f"Error processing file {uploaded_file.name}: {e}")
if all_data:
df = pd.DataFrame(all_data)
st.dataframe(df)
|