File size: 1,687 Bytes
5bea701
 
5a5c182
c40c6c3
88d066d
040362f
5bea701
9ac410d
 
 
 
 
88d066d
 
9ac410d
23fd868
 
 
 
 
aa023ef
23fd868
aa023ef
319dddf
6e78c7b
23fd868
6e78c7b
 
 
 
 
 
 
23fd868
 
 
6e78c7b
23fd868
 
 
 
 
 
6e78c7b
 
 
1d303e7
23fd868
 
 
 
1d303e7
79bc629
88d066d
c40c6c3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity

import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gliner import GLiNER


import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
from gliner import GLiNER

uploaded_files = st.file_uploader(
    "Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type="pdf"
)

if uploaded_files:
    all_data = []  # Store dictionaries of text and entities for each PDF
    for i, uploaded_file in enumerate(uploaded_files):
        try:
            pdf_reader = PdfReader(uploaded_file)
            text_data = ""
            for page in pdf_reader.pages:
                text_data += page.extract_text()

            model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
            labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
            entities = model.predict_entities(text_data, labels)

            entity_dict = {}
            for label in labels:
                entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]

            data = {"Text": text_data, **entity_dict}
            all_data.append(data)

        except Exception as e:
            st.error(f"Error processing file {uploaded_file.name}: {e}")

    if all_data:
        df = pd.DataFrame(all_data)
        st.dataframe(df)