Update app.py
Browse files
app.py
CHANGED
@@ -43,6 +43,34 @@ for uploaded_file in uploaded_files:
|
|
43 |
result = pd.concat(frames)
|
44 |
st.dataframe(result)
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
st.divider()
|
48 |
|
|
|
43 |
result = pd.concat(frames)
|
44 |
st.dataframe(result)
|
45 |
|
46 |
+
import re
|
47 |
+
def preprocess_text(text):
|
48 |
+
text = text.lower() # Lowercase text
|
49 |
+
text = re.sub(f"[{re.escape(string.punctuation)}]", "", text) # Remove punctuation
|
50 |
+
text = " ".join(text.split()) # Remove extra spaces, tabs, and new lines
|
51 |
+
|
52 |
+
return text
|
53 |
+
|
54 |
+
result['Text']= result["Text"].map(preprocess_text)
|
55 |
+
st.dataframe(result['Text'])
|
56 |
+
|
57 |
+
vectorizer = TfidfVectorizer()
|
58 |
+
tfidf_matrix = vectorizer.fit_transform(result['Text'])
|
59 |
+
|
60 |
+
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
|
61 |
+
st.subheader("TF-IDF Values:")
|
62 |
+
st.dataframe(tfidf_df)
|
63 |
+
|
64 |
+
cosine_sim_matrix = cosine_similarity(tfidf_matrix)
|
65 |
+
cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
|
66 |
+
st.subheader("Cosine Similarity Matrix:")
|
67 |
+
st.dataframe(cosine_sim_df)
|
68 |
+
|
69 |
+
|
70 |
+
st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
|
71 |
+
for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
|
72 |
+
st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
|
73 |
+
|
74 |
|
75 |
st.divider()
|
76 |
|