nlpblogs's picture
Update app.py
1d303e7 verified
raw
history blame
1.61 kB
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
uploaded_files = st.file_uploader(
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type = "pdf"
)
all_resumes = [] # Store the text content of each PDF
for uploaded_file in uploaded_files:
try:
pdf_reader = PdfReader(uploaded_file)
text_data = ""
for page in pdf_reader.pages:
text_data += page.extract_text()
resumes = pd.Series(text_data, index=['Candidate profile'])
st.dataframe(resumes)
for index, resume in enumerate(resumes):
st.write(f"Candidate profile: {index}, Resume: {resume}")
except Exception as e:
st.error(f"Error processing {uploaded_file.name}: {e}")
if all_resumes:
# Initialize the TF-IDF vectorizer
vectorizer = TfidfVectorizer()
# Fit and transform the text data
tfidf_matrix = vectorizer.fit_transform(all_resumes)
# Calculate the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix)
st.subheader("Cosine Similarity Matrix")
st.dataframe(cosine_sim)
elif uploaded_files:
st.info("Please upload at least two PDF files to calculate cosine similarity.")