nlpblogs commited on
Commit
63fdfc2
·
verified ·
1 Parent(s): 5043930

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py CHANGED
@@ -43,6 +43,34 @@ for uploaded_file in uploaded_files:
43
  result = pd.concat(frames)
44
  st.dataframe(result)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  st.divider()
48
 
 
43
  result = pd.concat(frames)
44
  st.dataframe(result)
45
 
46
+ import re
47
+ def preprocess_text(text):
48
+ text = text.lower() # Lowercase text
49
+ text = re.sub(f"[{re.escape(string.punctuation)}]", "", text) # Remove punctuation
50
+ text = " ".join(text.split()) # Remove extra spaces, tabs, and new lines
51
+
52
+ return text
53
+
54
+ result['Text']= result["Text"].map(preprocess_text)
55
+ st.dataframe(result['Text'])
56
+
57
+ vectorizer = TfidfVectorizer()
58
+ tfidf_matrix = vectorizer.fit_transform(result['Text'])
59
+
60
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
61
+ st.subheader("TF-IDF Values:")
62
+ st.dataframe(tfidf_df)
63
+
64
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
65
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
66
+ st.subheader("Cosine Similarity Matrix:")
67
+ st.dataframe(cosine_sim_df)
68
+
69
+
70
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
71
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
72
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
73
+
74
 
75
  st.divider()
76