Spaces:
Running
Running
Enhance email ID construction: use SHA-256 hash for file data and improve error logging for document addition
Browse files- app/controllers/mail.py +7 -3
app/controllers/mail.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
import os
|
3 |
import re
|
4 |
import base64
|
|
|
5 |
from datetime import datetime, timedelta
|
6 |
from venv import logger
|
7 |
from ics import Calendar
|
@@ -166,7 +167,7 @@ def list_emails(service, messages):
|
|
166 |
},
|
167 |
)
|
168 |
)
|
169 |
-
ids.append(f"{metadata['msg_id']}
|
170 |
if os.path.exists(path):
|
171 |
os.remove(path)
|
172 |
for index, document in enumerate(attach_docs or []):
|
@@ -181,7 +182,7 @@ def list_emails(service, messages):
|
|
181 |
}
|
182 |
document.metadata.update(metadata)
|
183 |
documents.append(document)
|
184 |
-
ids.append(f"{metadata['msg_id']}
|
185 |
elif msg["payload"]["mimeType"] == "text/plain" and "data" in msg["payload"]["body"]:
|
186 |
body = base64.urlsafe_b64decode(msg["payload"]["body"]["data"]).decode("utf-8")
|
187 |
body = re.sub(r"<[^>]+>", "", body)
|
@@ -197,7 +198,10 @@ def list_emails(service, messages):
|
|
197 |
if "multipart/alternative" in mime_types and len(mime_types) == 1:
|
198 |
print("Only multipart/alternative found in the email.")
|
199 |
else:
|
200 |
-
|
|
|
|
|
|
|
201 |
|
202 |
|
203 |
def collect(service, query=(datetime.today() - timedelta(days=10)).strftime("after:%Y/%m/%d")):
|
|
|
2 |
import os
|
3 |
import re
|
4 |
import base64
|
5 |
+
import hashlib
|
6 |
from datetime import datetime, timedelta
|
7 |
from venv import logger
|
8 |
from ics import Calendar
|
|
|
167 |
},
|
168 |
)
|
169 |
)
|
170 |
+
ids.append(f"{metadata['msg_id']}-{part['filename']}-{hashlib.sha256(file_data).hexdigest()}")
|
171 |
if os.path.exists(path):
|
172 |
os.remove(path)
|
173 |
for index, document in enumerate(attach_docs or []):
|
|
|
182 |
}
|
183 |
document.metadata.update(metadata)
|
184 |
documents.append(document)
|
185 |
+
ids.append(f"{metadata['msg_id']}-{hashlib.sha256(file_data).hexdigest()}-{index}")
|
186 |
elif msg["payload"]["mimeType"] == "text/plain" and "data" in msg["payload"]["body"]:
|
187 |
body = base64.urlsafe_b64decode(msg["payload"]["body"]["data"]).decode("utf-8")
|
188 |
body = re.sub(r"<[^>]+>", "", body)
|
|
|
198 |
if "multipart/alternative" in mime_types and len(mime_types) == 1:
|
199 |
print("Only multipart/alternative found in the email.")
|
200 |
else:
|
201 |
+
try:
|
202 |
+
vectorstore.add_documents(documents=documents, ids=ids)
|
203 |
+
except Exception as e:
|
204 |
+
logger.error("Error adding documents to vectorstore: %s", e)
|
205 |
|
206 |
|
207 |
def collect(service, query=(datetime.today() - timedelta(days=10)).strftime("after:%Y/%m/%d")):
|