File size: 1,748 Bytes
5bea701
 
5a5c182
c40c6c3
88d066d
040362f
5bea701
9ac410d
 
 
 
 
88d066d
 
9ac410d
23fd868
 
 
 
 
0c1dcc4
8754504
 
0c1dcc4
c71c13d
 
aa023ef
23fd868
aa023ef
319dddf
c71c13d
 
 
 
 
 
 
 
6e78c7b
c71c13d
 
 
6e78c7b
c71c13d
 
 
23fd868
 
c71c13d
 
 
 
 
 
 
 
0c1dcc4
8754504
1d303e7
79bc629
88d066d
c40c6c3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity

import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gliner import GLiNER


import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
from gliner import GLiNER

txt = st.text_area("Job description")
data1 = pd.Series(txt, index = ["Job Description"])
st.dataframe(data1)



uploaded_files = st.file_uploader(
    "Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type="pdf"
)

if uploaded_files:
    all_data = []  # Store dictionaries of text and entities for each PDF
    for uploaded_file in uploaded_files:
        try:
            pdf_reader = PdfReader(uploaded_file)
            text_data = ""
            for page in pdf_reader.pages:
                text_data += page.extract_text()

            model = GLiNER.from_pretrained("urchade/gliner_base")
            labels = ["person", "country", "organization", "time", "role"]
            entities = model.predict_entities(text_data, labels)

            entity_dict = {}
            for label in labels:
                entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]

            data = {"Text": text_data, **entity_dict}
            all_data.append(data)

        except Exception as e:
            st.error(f"Error processing file {uploaded_file.name}: {e}")

    if all_data:
        df = pd.DataFrame(all_data)
        st.dataframe(df)