Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -22,9 +22,6 @@ from sklearn.model_selection import train_test_split
|
|
22 |
emails = pd.read_csv('emails.csv')
|
23 |
print(emails.head())
|
24 |
|
25 |
-
# What a message looks like
|
26 |
-
print(emails['message'][0])
|
27 |
-
|
28 |
# Getting the content of the emails and saving to a list
|
29 |
content_text = []
|
30 |
for item in emails.message:
|
@@ -33,15 +30,9 @@ for item in emails.message:
|
|
33 |
cleaned_message = message.replace("\n","").replace("\r","").replace("> >>> > >","")
|
34 |
content_text.append(cleaned_message)
|
35 |
|
36 |
-
# Checking content of emails (first 5 items)
|
37 |
-
print(content_text[:5])
|
38 |
-
|
39 |
# Taking a sample of the dataset
|
40 |
train, test = train_test_split(content_text, train_size = 0.01) # Dataset is too large to complete embedding step
|
41 |
|
42 |
-
print(train[:5])
|
43 |
-
print(len(train))
|
44 |
-
|
45 |
# Setting up ids for ChromaDB collections
|
46 |
ids = []
|
47 |
for i in range(len(train)):
|
|
|
22 |
emails = pd.read_csv('emails.csv')
|
23 |
print(emails.head())
|
24 |
|
|
|
|
|
|
|
25 |
# Getting the content of the emails and saving to a list
|
26 |
content_text = []
|
27 |
for item in emails.message:
|
|
|
30 |
cleaned_message = message.replace("\n","").replace("\r","").replace("> >>> > >","")
|
31 |
content_text.append(cleaned_message)
|
32 |
|
|
|
|
|
|
|
33 |
# Taking a sample of the dataset
|
34 |
train, test = train_test_split(content_text, train_size = 0.01) # Dataset is too large to complete embedding step
|
35 |
|
|
|
|
|
|
|
36 |
# Setting up ids for ChromaDB collections
|
37 |
ids = []
|
38 |
for i in range(len(train)):
|