File size: 1,748 Bytes
5bea701 5a5c182 c40c6c3 88d066d 040362f 5bea701 9ac410d 88d066d 9ac410d 23fd868 0c1dcc4 8754504 0c1dcc4 c71c13d aa023ef 23fd868 aa023ef 319dddf c71c13d 6e78c7b c71c13d 6e78c7b c71c13d 23fd868 c71c13d 0c1dcc4 8754504 1d303e7 79bc629 88d066d c40c6c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gliner import GLiNER
import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
from gliner import GLiNER
txt = st.text_area("Job description")
data1 = pd.Series(txt, index = ["Job Description"])
st.dataframe(data1)
uploaded_files = st.file_uploader(
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type="pdf"
)
if uploaded_files:
all_data = [] # Store dictionaries of text and entities for each PDF
for uploaded_file in uploaded_files:
try:
pdf_reader = PdfReader(uploaded_file)
text_data = ""
for page in pdf_reader.pages:
text_data += page.extract_text()
model = GLiNER.from_pretrained("urchade/gliner_base")
labels = ["person", "country", "organization", "time", "role"]
entities = model.predict_entities(text_data, labels)
entity_dict = {}
for label in labels:
entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
data = {"Text": text_data, **entity_dict}
all_data.append(data)
except Exception as e:
st.error(f"Error processing file {uploaded_file.name}: {e}")
if all_data:
df = pd.DataFrame(all_data)
st.dataframe(df)
|