|
import streamlit as st |
|
from PyPDF2 import PdfReader |
|
import pandas as pd |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
import streamlit as st |
|
from PyPDF2 import PdfReader |
|
import pandas as pd |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
uploaded_files = st.file_uploader( |
|
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type = "pdf" |
|
) |
|
|
|
all_resumes = [] |
|
|
|
for uploaded_file in uploaded_files: |
|
try: |
|
pdf_reader = PdfReader(uploaded_file) |
|
text_data = "" |
|
for page in pdf_reader.pages: |
|
text_data += page.extract_text() |
|
resumes = pd.Series(text_data, index=['Candidate profile']) |
|
st.dataframe(resumes) |
|
|
|
for index, resume in enumerate(resumes): |
|
st.write(f"Candidate profile: {index}, Resume: {resume}") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
st.error(f"Error processing {uploaded_file.name}: {e}") |
|
|
|
if all_resumes: |
|
|
|
vectorizer = TfidfVectorizer() |
|
|
|
|
|
tfidf_matrix = vectorizer.fit_transform(all_resumes) |
|
|
|
|
|
cosine_sim = cosine_similarity(tfidf_matrix) |
|
|
|
st.subheader("Cosine Similarity Matrix") |
|
st.dataframe(cosine_sim) |
|
elif uploaded_files: |
|
st.info("Please upload at least two PDF files to calculate cosine similarity.") |
|
|
|
|