eHemink commited on
Commit
b71a3bb
·
verified ·
1 Parent(s): 57a2bd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -9
app.py CHANGED
@@ -22,9 +22,6 @@ from sklearn.model_selection import train_test_split
22
  emails = pd.read_csv('emails.csv')
23
  print(emails.head())
24
 
25
- # What a message looks like
26
- print(emails['message'][0])
27
-
28
  # Getting the content of the emails and saving to a list
29
  content_text = []
30
  for item in emails.message:
@@ -33,15 +30,9 @@ for item in emails.message:
33
  cleaned_message = message.replace("\n","").replace("\r","").replace("> >>> > >","")
34
  content_text.append(cleaned_message)
35
 
36
- # Checking content of emails (first 5 items)
37
- print(content_text[:5])
38
-
39
  # Taking a sample of the dataset
40
  train, test = train_test_split(content_text, train_size = 0.01) # Dataset is too large to complete embedding step
41
 
42
- print(train[:5])
43
- print(len(train))
44
-
45
  # Setting up ids for ChromaDB collections
46
  ids = []
47
  for i in range(len(train)):
 
22
  emails = pd.read_csv('emails.csv')
23
  print(emails.head())
24
 
 
 
 
25
  # Getting the content of the emails and saving to a list
26
  content_text = []
27
  for item in emails.message:
 
30
  cleaned_message = message.replace("\n","").replace("\r","").replace("> >>> > >","")
31
  content_text.append(cleaned_message)
32
 
 
 
 
33
  # Taking a sample of the dataset
34
  train, test = train_test_split(content_text, train_size = 0.01) # Dataset is too large to complete embedding step
35
 
 
 
 
36
  # Setting up ids for ChromaDB collections
37
  ids = []
38
  for i in range(len(train)):