nlpblogs commited on
Commit
cef76db
·
verified ·
1 Parent(s): aa6c862

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -50
app.py CHANGED
@@ -18,93 +18,97 @@ import pandas as pd
18
  from PyPDF2 import PdfReader
19
  from gliner import GLiNER
20
 
 
 
 
 
 
 
21
 
22
- txt = st.text_area("Job description", key = "text 1")
23
- job_description_series = pd.Series(txt, name="Text")
24
- st.dataframe(job_description_series)
 
25
 
26
- uploaded_files = st.file_uploader(
27
- "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 1"
28
  )
29
 
 
30
 
31
- all_resumes_text = [] # Store the text content of each PDF
32
-
33
- if uploaded_files:
34
- for uploaded_file in uploaded_files:
35
  try:
36
  pdf_reader = PdfReader(uploaded_file)
37
  text_data = ""
38
  for page in pdf_reader.pages:
39
  text_data += page.extract_text()
40
- all_resumes_text.append(text_data)
41
  except Exception as e:
42
  st.error(f"Error processing file {uploaded_file.name}: {e}")
43
 
44
- if all_resumes_text:
45
- all_documents = [job_description_series.iloc[0]] + all_resumes_text
46
 
47
- vectorizer = TfidfVectorizer()
48
- tfidf_matrix = vectorizer.fit_transform(all_documents)
49
 
50
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
51
- st.subheader("TF-IDF Values:")
52
- st.dataframe(tfidf_df)
53
 
54
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
55
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
56
- st.subheader("Cosine Similarity Matrix:")
57
- st.dataframe(cosine_sim_df)
58
 
59
- # Display similarity scores between the job description and each resume
60
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
61
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
62
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
63
 
64
-
65
-
66
  st.divider()
67
 
68
- txt = st.text_area("Job description", key = "text 2")
69
- job_description_series = pd.Series(txt, name="Text")
70
- st.dataframe(job_description_series)
 
71
 
72
- uploaded_files = st.file_uploader(
73
- "Choose a PDF file(s) for candidate profiles", type="pdf", key = "candidate 2"
74
  )
75
 
 
76
 
77
- all_resumes_text = [] # Store the text content of each PDF
78
-
79
- if uploaded_files:
80
- for uploaded_file in uploaded_files:
81
  try:
82
  pdf_reader = PdfReader(uploaded_file)
83
  text_data = ""
84
  for page in pdf_reader.pages:
85
  text_data += page.extract_text()
86
- all_resumes_text.append(text_data)
87
  except Exception as e:
88
  st.error(f"Error processing file {uploaded_file.name}: {e}")
89
 
90
- if all_resumes_text:
91
- all_documents = [job_description_series.iloc[0]] + all_resumes_text
92
 
93
- vectorizer = TfidfVectorizer()
94
- tfidf_matrix = vectorizer.fit_transform(all_documents)
95
 
96
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
97
- st.subheader("TF-IDF Values:")
98
- st.dataframe(tfidf_df)
99
 
100
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
101
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
102
- st.subheader("Cosine Similarity Matrix:")
103
- st.dataframe(cosine_sim_df)
104
 
105
- # Display similarity scores between the job description and each resume
106
- st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
107
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
108
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
109
 
 
 
110
 
 
18
  from PyPDF2 import PdfReader
19
  from gliner import GLiNER
20
 
21
+ import streamlit as st
22
+ import pandas as pd
23
+ from PyPDF2 import PdfReader
24
+ from sklearn.feature_extraction.text import TfidfVectorizer
25
+ from sklearn.metrics.pairwise import cosine_similarity
26
+ import tempfile
27
 
28
+ # First set of inputs and calculations
29
+ txt1 = st.text_area("Job description 1", key="text 1")
30
+ job_description_series1 = pd.Series([txt1], name="Text")
31
+ st.dataframe(job_description_series1)
32
 
33
+ uploaded_files1 = st.file_uploader(
34
+ "Choose PDF file(s) for candidate profiles 1", type="pdf", key="candidate 1", accept_multiple_files=True
35
  )
36
 
37
+ all_resumes_text1 = [] # Store the text content of each PDF
38
 
39
+ if uploaded_files1:
40
+ for uploaded_file in uploaded_files1:
 
 
41
  try:
42
  pdf_reader = PdfReader(uploaded_file)
43
  text_data = ""
44
  for page in pdf_reader.pages:
45
  text_data += page.extract_text()
46
+ all_resumes_text1.append(text_data)
47
  except Exception as e:
48
  st.error(f"Error processing file {uploaded_file.name}: {e}")
49
 
50
+ if all_resumes_text1:
51
+ all_documents1 = [job_description_series1.iloc[0]] + all_resumes_text1
52
 
53
+ vectorizer1 = TfidfVectorizer()
54
+ tfidf_matrix1 = vectorizer1.fit_transform(all_documents1)
55
 
56
+ tfidf_df1 = pd.DataFrame(tfidf_matrix1.toarray(), columns=vectorizer1.get_feature_names_out())
57
+ st.subheader("TF-IDF Values (Set 1):")
58
+ st.dataframe(tfidf_df1)
59
 
60
+ cosine_sim_matrix1 = cosine_similarity(tfidf_matrix1)
61
+ cosine_sim_df1 = pd.DataFrame(cosine_sim_matrix1)
62
+ st.subheader("Cosine Similarity Matrix (Set 1):")
63
+ st.dataframe(cosine_sim_df1)
64
 
65
+ st.subheader("Cosine Similarity Scores (Job Description 1 vs. Resumes 1):")
66
+ for i, similarity_score in enumerate(cosine_sim_matrix1[0][1:]):
 
67
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
68
 
 
 
69
  st.divider()
70
 
71
+ # Second set of inputs and calculations
72
+ txt2 = st.text_area("Job description 2", key="text 2")
73
+ job_description_series2 = pd.Series([txt2], name="Text")
74
+ st.dataframe(job_description_series2)
75
 
76
+ uploaded_files2 = st.file_uploader(
77
+ "Choose PDF file(s) for candidate profiles 2", type="pdf", key="candidate 2", accept_multiple_files=True
78
  )
79
 
80
+ all_resumes_text2 = [] # Store the text content of each PDF
81
 
82
+ if uploaded_files2:
83
+ for uploaded_file in uploaded_files2:
 
 
84
  try:
85
  pdf_reader = PdfReader(uploaded_file)
86
  text_data = ""
87
  for page in pdf_reader.pages:
88
  text_data += page.extract_text()
89
+ all_resumes_text2.append(text_data)
90
  except Exception as e:
91
  st.error(f"Error processing file {uploaded_file.name}: {e}")
92
 
93
+ if all_resumes_text2:
94
+ all_documents2 = [job_description_series2.iloc[0]] + all_resumes_text2
95
 
96
+ vectorizer2 = TfidfVectorizer()
97
+ tfidf_matrix2 = vectorizer2.fit_transform(all_documents2)
98
 
99
+ tfidf_df2 = pd.DataFrame(tfidf_matrix2.toarray(), columns=vectorizer2.get_feature_names_out())
100
+ st.subheader("TF-IDF Values (Set 2):")
101
+ st.dataframe(tfidf_df2)
102
 
103
+ cosine_sim_matrix2 = cosine_similarity(tfidf_matrix2)
104
+ cosine_sim_df2 = pd.DataFrame(cosine_sim_matrix2)
105
+ st.subheader("Cosine Similarity Matrix (Set 2):")
106
+ st.dataframe(cosine_sim_df2)
107
 
108
+ st.subheader("Cosine Similarity Scores (Job Description 2 vs. Resumes 2):")
109
+ for i, similarity_score in enumerate(cosine_sim_matrix2[0][1:]):
 
110
  st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
111
 
112
+
113
+
114