File size: 1,612 Bytes
5bea701 5a5c182 c40c6c3 040362f 5bea701 9ac410d aa023ef 98b95b5 aa023ef 319dddf 9ac410d 319dddf aa023ef 319dddf 1d303e7 319dddf 9ac410d 79bc629 9ac410d c40c6c3 9ac410d c40c6c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
uploaded_files = st.file_uploader(
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type = "pdf"
)
all_resumes = [] # Store the text content of each PDF
for uploaded_file in uploaded_files:
try:
pdf_reader = PdfReader(uploaded_file)
text_data = ""
for page in pdf_reader.pages:
text_data += page.extract_text()
resumes = pd.Series(text_data, index=['Candidate profile'])
st.dataframe(resumes)
for index, resume in enumerate(resumes):
st.write(f"Candidate profile: {index}, Resume: {resume}")
except Exception as e:
st.error(f"Error processing {uploaded_file.name}: {e}")
if all_resumes:
# Initialize the TF-IDF vectorizer
vectorizer = TfidfVectorizer()
# Fit and transform the text data
tfidf_matrix = vectorizer.fit_transform(all_resumes)
# Calculate the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix)
st.subheader("Cosine Similarity Matrix")
st.dataframe(cosine_sim)
elif uploaded_files:
st.info("Please upload at least two PDF files to calculate cosine similarity.")
|