Update app.py
Browse files
app.py
CHANGED
@@ -18,54 +18,47 @@ import pandas as pd
|
|
18 |
from PyPDF2 import PdfReader
|
19 |
from gliner import GLiNER
|
20 |
|
21 |
-
txt = st.text_area("Job description")
|
22 |
-
data1 = pd.Series(txt, name = "Text")
|
23 |
-
st.dataframe(data1)
|
24 |
-
|
25 |
|
|
|
|
|
|
|
26 |
|
27 |
uploaded_files = st.file_uploader(
|
28 |
-
"Choose a PDF file(s)
|
29 |
)
|
30 |
|
|
|
|
|
31 |
if uploaded_files:
|
32 |
-
all_data = [] # Store dictionaries of text and entities for each PDF
|
33 |
for uploaded_file in uploaded_files:
|
34 |
try:
|
35 |
pdf_reader = PdfReader(uploaded_file)
|
36 |
text_data = ""
|
37 |
for page in pdf_reader.pages:
|
38 |
text_data += page.extract_text()
|
|
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
entities = model.predict_entities(text_data, labels)
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
|
47 |
|
48 |
-
|
49 |
-
|
|
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
st.dataframe(result)
|
59 |
-
|
60 |
-
y = result[['Text']]
|
61 |
-
st.dataframe(y)
|
62 |
-
|
63 |
-
vec = TfidfVectorizer()
|
64 |
-
tf_idf = vec.fit_transform(y)
|
65 |
-
x = pd.DataFrame(tf_idf.toarray(), columns=vec.get_feature_names_out())
|
66 |
-
st.dataframe(x)
|
67 |
-
cosine_sim = cosine_similarity(tf_idf, tf_idf)
|
68 |
-
st.dataframe(cosine_sim)
|
69 |
|
70 |
|
71 |
|
|
|
18 |
from PyPDF2 import PdfReader
|
19 |
from gliner import GLiNER
|
20 |
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
txt = st.text_area("Job description")
|
23 |
+
job_description_series = pd.Series([txt], name="Text")
|
24 |
+
st.dataframe(job_description_series)
|
25 |
|
26 |
uploaded_files = st.file_uploader(
|
27 |
+
"Choose a PDF file(s) for candidate profiles", accept_multiple_files=True, type="pdf"
|
28 |
)
|
29 |
|
30 |
+
all_resumes_text = [] # Store the text content of each PDF
|
31 |
+
|
32 |
if uploaded_files:
|
|
|
33 |
for uploaded_file in uploaded_files:
|
34 |
try:
|
35 |
pdf_reader = PdfReader(uploaded_file)
|
36 |
text_data = ""
|
37 |
for page in pdf_reader.pages:
|
38 |
text_data += page.extract_text()
|
39 |
+
all_resumes_text.append(text_data)
|
40 |
+
except Exception as e:
|
41 |
+
st.error(f"Error processing file {uploaded_file.name}: {e}")
|
42 |
|
43 |
+
if all_resumes_text:
|
44 |
+
all_documents = [job_description_series.iloc[0]] + all_resumes_text
|
|
|
45 |
|
46 |
+
vectorizer = TfidfVectorizer()
|
47 |
+
tfidf_matrix = vectorizer.fit_transform(all_documents)
|
|
|
48 |
|
49 |
+
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
|
50 |
+
st.subheader("TF-IDF Values:")
|
51 |
+
st.dataframe(tfidf_df)
|
52 |
|
53 |
+
cosine_sim_matrix = cosine_similarity(tfidf_matrix)
|
54 |
+
cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
|
55 |
+
st.subheader("Cosine Similarity Matrix:")
|
56 |
+
st.dataframe(cosine_sim_df)
|
57 |
|
58 |
+
# Display similarity scores between the job description and each resume
|
59 |
+
st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
|
60 |
+
for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
|
61 |
+
st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
|