Add EmailQuery model and refactor email retrieval to support query parameters
Browse files- app/controllers/mail.py +38 -19
- app/router/mail.py +1 -1
- app/schema/__init__.py +20 -1
app/controllers/mail.py
CHANGED
@@ -22,7 +22,40 @@ EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
|
|
22 |
ATTACHMENTS_DIR = "cache"
|
23 |
os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
|
24 |
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def search_emails(service, query):
|
27 |
"""Search emails based on a query."""
|
28 |
result = service.users().messages().list(userId="me", q=query).execute()
|
@@ -79,10 +112,6 @@ def list_emails(service, messages):
|
|
79 |
metadata["threadId"] = msg["threadId"]
|
80 |
metadata["msgId"] = msg["id"]
|
81 |
msgId = f"{msg['threadId']}-{msg['id']}"
|
82 |
-
# for docstore_id in list(vectorstore.index_to_docstore_id.values()):
|
83 |
-
# if docstore_id.startswith(msgId):
|
84 |
-
# logger.info("Already indexed: %s", msgId)
|
85 |
-
# continue
|
86 |
for header in msg["payload"]["headers"]:
|
87 |
if header["name"] == "From":
|
88 |
metadata["from"] = header["value"]
|
@@ -97,7 +126,6 @@ def list_emails(service, messages):
|
|
97 |
"%d/%m/%Y %H:%M:%S"
|
98 |
)
|
99 |
metadata["userId"] = service.users().getProfile(userId="me").execute().get("emailAddress")
|
100 |
-
# print(metadata, msg["payload"]["mimeType"])
|
101 |
ids = []
|
102 |
documents = []
|
103 |
mime_types = []
|
@@ -231,7 +259,7 @@ def collect(service, query=(datetime.today() - timedelta(days=10)).strftime("aft
|
|
231 |
logger.info("No emails found after two weeks ago.")
|
232 |
|
233 |
|
234 |
-
def get_emails(service, query
|
235 |
"""
|
236 |
Retrieve a list of emails with subject, to, from, cc, and content.
|
237 |
|
@@ -244,19 +272,16 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
|
|
244 |
"""
|
245 |
try:
|
246 |
# List messages
|
247 |
-
|
|
|
|
|
248 |
messages = response.get('messages', [])
|
249 |
email_list = []
|
250 |
-
|
251 |
if not messages:
|
252 |
return email_list
|
253 |
-
|
254 |
for message in messages:
|
255 |
-
# Get detailed message data
|
256 |
msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
|
257 |
headers = msg['payload']['headers']
|
258 |
-
|
259 |
-
# Initialize email details
|
260 |
email_data = {
|
261 |
'subject': '',
|
262 |
'from': '',
|
@@ -265,8 +290,6 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
|
|
265 |
'content': '',
|
266 |
'snippet': msg['snippet'] if 'snippet' in msg else '',
|
267 |
}
|
268 |
-
|
269 |
-
# Extract headers
|
270 |
for header in headers:
|
271 |
name = header['name'].lower()
|
272 |
if name == 'subject':
|
@@ -277,8 +300,6 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
|
|
277 |
email_data['to'] = header['value']
|
278 |
elif name == 'cc':
|
279 |
email_data['cc'] = header['value']
|
280 |
-
|
281 |
-
# Extract content
|
282 |
if 'parts' in msg['payload']:
|
283 |
for part in msg['payload']['parts']:
|
284 |
if part['mimeType'] == 'text/plain':
|
@@ -289,9 +310,7 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
|
|
289 |
break
|
290 |
elif 'data' in msg['payload']['body']:
|
291 |
email_data['content'] = base64.urlsafe_b64decode(msg['payload']['body']['data']).decode('utf-8')
|
292 |
-
|
293 |
email_list.append(email_data)
|
294 |
-
|
295 |
return email_list
|
296 |
|
297 |
except Exception as e:
|
|
|
22 |
ATTACHMENTS_DIR = "cache"
|
23 |
os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
|
24 |
|
25 |
+
def build_query(params):
|
26 |
+
"""
|
27 |
+
Constructs a query string based on the provided parameters.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
params (dict): A dictionary containing optional query parameters.
|
31 |
+
Supported keys include:
|
32 |
+
- 'subject' (str): The subject of the email.
|
33 |
+
- 'from' (str): The sender's email address.
|
34 |
+
- 'to' (str): The recipient's email address.
|
35 |
+
- 'cc' (str): The CC recipient's email address.
|
36 |
+
- 'after' (str): A date string to filter emails sent after this date.
|
37 |
+
- 'before' (str): A date string to filter emails sent before this date.
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
str: A query string constructed from the provided parameters. Each parameter
|
41 |
+
is formatted as a key-value pair and joined by spaces. If a parameter is not
|
42 |
+
provided or is empty, it is excluded from the query string.
|
43 |
+
"""
|
44 |
+
query_parts = []
|
45 |
+
if 'subject' in params and params['subject']:
|
46 |
+
query_parts.append(f'subject:"{params["subject"]}"')
|
47 |
+
if 'from' in params and params['from']:
|
48 |
+
query_parts.append(f'from:{params["from"]}')
|
49 |
+
if 'to' in params and params['to']:
|
50 |
+
query_parts.append(f'to:{params["to"]}')
|
51 |
+
if 'cc' in params and params['cc']:
|
52 |
+
query_parts.append(f'cc:{params["cc"]}')
|
53 |
+
if 'after' in params and params['after']:
|
54 |
+
query_parts.append(f'after:{params["after"]}')
|
55 |
+
if 'before' in params and params['before']:
|
56 |
+
query_parts.append(f'before:{params["before"]}')
|
57 |
+
return ' '.join(query_parts)
|
58 |
+
|
59 |
def search_emails(service, query):
|
60 |
"""Search emails based on a query."""
|
61 |
result = service.users().messages().list(userId="me", q=query).execute()
|
|
|
112 |
metadata["threadId"] = msg["threadId"]
|
113 |
metadata["msgId"] = msg["id"]
|
114 |
msgId = f"{msg['threadId']}-{msg['id']}"
|
|
|
|
|
|
|
|
|
115 |
for header in msg["payload"]["headers"]:
|
116 |
if header["name"] == "From":
|
117 |
metadata["from"] = header["value"]
|
|
|
126 |
"%d/%m/%Y %H:%M:%S"
|
127 |
)
|
128 |
metadata["userId"] = service.users().getProfile(userId="me").execute().get("emailAddress")
|
|
|
129 |
ids = []
|
130 |
documents = []
|
131 |
mime_types = []
|
|
|
259 |
logger.info("No emails found after two weeks ago.")
|
260 |
|
261 |
|
262 |
+
def get_emails(service, query, max_results=10):
|
263 |
"""
|
264 |
Retrieve a list of emails with subject, to, from, cc, and content.
|
265 |
|
|
|
272 |
"""
|
273 |
try:
|
274 |
# List messages
|
275 |
+
query = build_query(query.dict())
|
276 |
+
response = service.users().messages().list(
|
277 |
+
userId='me', q=query, maxResults=max_results).execute()
|
278 |
messages = response.get('messages', [])
|
279 |
email_list = []
|
|
|
280 |
if not messages:
|
281 |
return email_list
|
|
|
282 |
for message in messages:
|
|
|
283 |
msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
|
284 |
headers = msg['payload']['headers']
|
|
|
|
|
285 |
email_data = {
|
286 |
'subject': '',
|
287 |
'from': '',
|
|
|
290 |
'content': '',
|
291 |
'snippet': msg['snippet'] if 'snippet' in msg else '',
|
292 |
}
|
|
|
|
|
293 |
for header in headers:
|
294 |
name = header['name'].lower()
|
295 |
if name == 'subject':
|
|
|
300 |
email_data['to'] = header['value']
|
301 |
elif name == 'cc':
|
302 |
email_data['cc'] = header['value']
|
|
|
|
|
303 |
if 'parts' in msg['payload']:
|
304 |
for part in msg['payload']['parts']:
|
305 |
if part['mimeType'] == 'text/plain':
|
|
|
310 |
break
|
311 |
elif 'data' in msg['payload']['body']:
|
312 |
email_data['content'] = base64.urlsafe_b64decode(msg['payload']['body']['data']).decode('utf-8')
|
|
|
313 |
email_list.append(email_data)
|
|
|
314 |
return email_list
|
315 |
|
316 |
except Exception as e:
|
app/router/mail.py
CHANGED
@@ -71,5 +71,5 @@ def get(query: MailReqData, request: Request):
|
|
71 |
scopes=cred_dict["scopes"],
|
72 |
)
|
73 |
mailservice = build("gmail", "v1", credentials=credentials)
|
74 |
-
result = mail.get_emails(mailservice, query.query)
|
75 |
return JSONResponse(content= result)
|
|
|
71 |
scopes=cred_dict["scopes"],
|
72 |
)
|
73 |
mailservice = build("gmail", "v1", credentials=credentials)
|
74 |
+
result = mail.get_emails(mailservice, query.query, query.query.max_results)
|
75 |
return JSONResponse(content= result)
|
app/schema/__init__.py
CHANGED
@@ -2,6 +2,25 @@
|
|
2 |
from typing import Optional, List
|
3 |
from pydantic import BaseModel, Field
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
class ReqData(BaseModel):
|
6 |
"""
|
7 |
RequestData is a Pydantic model that represents the data structure for a request.
|
@@ -28,7 +47,7 @@ class MailReqData(BaseModel):
|
|
28 |
query (str): The query or message content sent by the user.
|
29 |
"""
|
30 |
email: str
|
31 |
-
query:
|
32 |
|
33 |
class ReqFollowUp(BaseModel):
|
34 |
"""
|
|
|
2 |
from typing import Optional, List
|
3 |
from pydantic import BaseModel, Field
|
4 |
|
5 |
+
class EmailQuery(BaseModel):
|
6 |
+
"""
|
7 |
+
EmailQuery model representing the structure of an email query.
|
8 |
+
|
9 |
+
Attributes:
|
10 |
+
subject (Optional[str]): The subject of the email to search for.
|
11 |
+
from_email (Optional[str]): The sender's email address.
|
12 |
+
to_email (Optional[str]): The recipient's email address.
|
13 |
+
cc_email (Optional[str]): The CC email address.
|
14 |
+
after (Optional[str]): The date after which to search for emails.
|
15 |
+
max_results (Optional[int]): The maximum number of results to return.
|
16 |
+
"""
|
17 |
+
subject: Optional[str]
|
18 |
+
from_email: Optional[str] = Field(None, alias="from")
|
19 |
+
to_email: Optional[str] = Field(None, alias="to")
|
20 |
+
cc_email: Optional[str] = Field(None, alias="cc")
|
21 |
+
after: Optional[str]
|
22 |
+
max_results: Optional[int] = 10
|
23 |
+
|
24 |
class ReqData(BaseModel):
|
25 |
"""
|
26 |
RequestData is a Pydantic model that represents the data structure for a request.
|
|
|
47 |
query (str): The query or message content sent by the user.
|
48 |
"""
|
49 |
email: str
|
50 |
+
query: EmailQuery
|
51 |
|
52 |
class ReqFollowUp(BaseModel):
|
53 |
"""
|