nlpblogs commited on
Commit
36fcfae
·
verified ·
1 Parent(s): 712f786

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -11
app.py CHANGED
@@ -40,22 +40,16 @@ for uploaded_file in uploaded_files:
40
  data = pd.Series(text_data, name = 'Text')
41
  st.dataframe(data)
42
  frames = [job, data]
43
- result = pd.concat(frames)
 
 
 
44
  st.dataframe(result)
45
 
46
- import re
47
- def preprocess_text(text):
48
- text = text.lower() # Lowercase text
49
- text = re.sub(f"[{re.escape(string.punctuation)}]", "", text) # Remove punctuation
50
- text = " ".join(text.split()) # Remove extra spaces, tabs, and new lines
51
-
52
- return text
53
 
54
- result['Text']= result['Text'].map(preprocess_text)
55
- st.dataframe(result['Text'])
56
 
57
  vectorizer = TfidfVectorizer()
58
- tfidf_matrix = vectorizer.fit_transform(result['Text'])
59
 
60
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
61
  st.subheader("TF-IDF Values:")
 
40
  data = pd.Series(text_data, name = 'Text')
41
  st.dataframe(data)
42
  frames = [job, data]
43
+ result1 = pd.concat(frames)
44
+ st.dataframe(result1)
45
+
46
+ result = result1['Text'].drop_duplicates().to_list()
47
  st.dataframe(result)
48
 
 
 
 
 
 
 
 
49
 
 
 
50
 
51
  vectorizer = TfidfVectorizer()
52
+ tfidf_matrix = vectorizer.fit_transform(result)
53
 
54
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
55
  st.subheader("TF-IDF Values:")