import streamlit as st from PyPDF2 import PdfReader import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import streamlit as st from PyPDF2 import PdfReader import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from gliner import GLiNER import streamlit as st import pandas as pd from PyPDF2 import PdfReader from gliner import GLiNER txt = st.text_area("Job description") job_description_series = pd.Series([txt], name="Text") st.dataframe(job_description_series) uploaded_files = st.file_uploader( "Choose a PDF file(s) for candidate profiles", accept_multiple_files=True, type="pdf" ) all_resumes_text = [] # Store the text content of each PDF if uploaded_files: for uploaded_file in uploaded_files: try: pdf_reader = PdfReader(uploaded_file) text_data = "" for page in pdf_reader.pages: text_data += page.extract_text() all_resumes_text.append(text_data) except Exception as e: st.error(f"Error processing file {uploaded_file.name}: {e}") if all_resumes_text: all_documents = [job_description_series.iloc[0]] + all_resumes_text vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(all_documents) tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out()) st.subheader("TF-IDF Values:") st.dataframe(tfidf_df) cosine_sim_matrix = cosine_similarity(tfidf_matrix) cosine_sim_df = pd.DataFrame(cosine_sim_matrix) st.subheader("Cosine Similarity Matrix:") st.dataframe(cosine_sim_df) # Display similarity scores between the job description and each resume st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):") for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]): st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")