gavinzli commited on
Commit
a111c12
·
1 Parent(s): bc5f7f1

Add EmailQuery model and refactor email retrieval to support query parameters

Browse files
app/controllers/mail.py CHANGED
@@ -22,7 +22,40 @@ EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
22
  ATTACHMENTS_DIR = "cache"
23
  os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
24
 
25
- # service = build_gmail_service()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def search_emails(service, query):
27
  """Search emails based on a query."""
28
  result = service.users().messages().list(userId="me", q=query).execute()
@@ -79,10 +112,6 @@ def list_emails(service, messages):
79
  metadata["threadId"] = msg["threadId"]
80
  metadata["msgId"] = msg["id"]
81
  msgId = f"{msg['threadId']}-{msg['id']}"
82
- # for docstore_id in list(vectorstore.index_to_docstore_id.values()):
83
- # if docstore_id.startswith(msgId):
84
- # logger.info("Already indexed: %s", msgId)
85
- # continue
86
  for header in msg["payload"]["headers"]:
87
  if header["name"] == "From":
88
  metadata["from"] = header["value"]
@@ -97,7 +126,6 @@ def list_emails(service, messages):
97
  "%d/%m/%Y %H:%M:%S"
98
  )
99
  metadata["userId"] = service.users().getProfile(userId="me").execute().get("emailAddress")
100
- # print(metadata, msg["payload"]["mimeType"])
101
  ids = []
102
  documents = []
103
  mime_types = []
@@ -231,7 +259,7 @@ def collect(service, query=(datetime.today() - timedelta(days=10)).strftime("aft
231
  logger.info("No emails found after two weeks ago.")
232
 
233
 
234
- def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("after:%Y/%m/%d"), max_results=10):
235
  """
236
  Retrieve a list of emails with subject, to, from, cc, and content.
237
 
@@ -244,19 +272,16 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
244
  """
245
  try:
246
  # List messages
247
- response = service.users().messages().list(userId='me', q=query, maxResults=max_results).execute()
 
 
248
  messages = response.get('messages', [])
249
  email_list = []
250
-
251
  if not messages:
252
  return email_list
253
-
254
  for message in messages:
255
- # Get detailed message data
256
  msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
257
  headers = msg['payload']['headers']
258
-
259
- # Initialize email details
260
  email_data = {
261
  'subject': '',
262
  'from': '',
@@ -265,8 +290,6 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
265
  'content': '',
266
  'snippet': msg['snippet'] if 'snippet' in msg else '',
267
  }
268
-
269
- # Extract headers
270
  for header in headers:
271
  name = header['name'].lower()
272
  if name == 'subject':
@@ -277,8 +300,6 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
277
  email_data['to'] = header['value']
278
  elif name == 'cc':
279
  email_data['cc'] = header['value']
280
-
281
- # Extract content
282
  if 'parts' in msg['payload']:
283
  for part in msg['payload']['parts']:
284
  if part['mimeType'] == 'text/plain':
@@ -289,9 +310,7 @@ def get_emails(service, query=(datetime.today() - timedelta(days=10)).strftime("
289
  break
290
  elif 'data' in msg['payload']['body']:
291
  email_data['content'] = base64.urlsafe_b64decode(msg['payload']['body']['data']).decode('utf-8')
292
-
293
  email_list.append(email_data)
294
-
295
  return email_list
296
 
297
  except Exception as e:
 
22
  ATTACHMENTS_DIR = "cache"
23
  os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
24
 
25
+ def build_query(params):
26
+ """
27
+ Constructs a query string based on the provided parameters.
28
+
29
+ Args:
30
+ params (dict): A dictionary containing optional query parameters.
31
+ Supported keys include:
32
+ - 'subject' (str): The subject of the email.
33
+ - 'from' (str): The sender's email address.
34
+ - 'to' (str): The recipient's email address.
35
+ - 'cc' (str): The CC recipient's email address.
36
+ - 'after' (str): A date string to filter emails sent after this date.
37
+ - 'before' (str): A date string to filter emails sent before this date.
38
+
39
+ Returns:
40
+ str: A query string constructed from the provided parameters. Each parameter
41
+ is formatted as a key-value pair and joined by spaces. If a parameter is not
42
+ provided or is empty, it is excluded from the query string.
43
+ """
44
+ query_parts = []
45
+ if 'subject' in params and params['subject']:
46
+ query_parts.append(f'subject:"{params["subject"]}"')
47
+ if 'from' in params and params['from']:
48
+ query_parts.append(f'from:{params["from"]}')
49
+ if 'to' in params and params['to']:
50
+ query_parts.append(f'to:{params["to"]}')
51
+ if 'cc' in params and params['cc']:
52
+ query_parts.append(f'cc:{params["cc"]}')
53
+ if 'after' in params and params['after']:
54
+ query_parts.append(f'after:{params["after"]}')
55
+ if 'before' in params and params['before']:
56
+ query_parts.append(f'before:{params["before"]}')
57
+ return ' '.join(query_parts)
58
+
59
  def search_emails(service, query):
60
  """Search emails based on a query."""
61
  result = service.users().messages().list(userId="me", q=query).execute()
 
112
  metadata["threadId"] = msg["threadId"]
113
  metadata["msgId"] = msg["id"]
114
  msgId = f"{msg['threadId']}-{msg['id']}"
 
 
 
 
115
  for header in msg["payload"]["headers"]:
116
  if header["name"] == "From":
117
  metadata["from"] = header["value"]
 
126
  "%d/%m/%Y %H:%M:%S"
127
  )
128
  metadata["userId"] = service.users().getProfile(userId="me").execute().get("emailAddress")
 
129
  ids = []
130
  documents = []
131
  mime_types = []
 
259
  logger.info("No emails found after two weeks ago.")
260
 
261
 
262
+ def get_emails(service, query, max_results=10):
263
  """
264
  Retrieve a list of emails with subject, to, from, cc, and content.
265
 
 
272
  """
273
  try:
274
  # List messages
275
+ query = build_query(query.dict())
276
+ response = service.users().messages().list(
277
+ userId='me', q=query, maxResults=max_results).execute()
278
  messages = response.get('messages', [])
279
  email_list = []
 
280
  if not messages:
281
  return email_list
 
282
  for message in messages:
 
283
  msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
284
  headers = msg['payload']['headers']
 
 
285
  email_data = {
286
  'subject': '',
287
  'from': '',
 
290
  'content': '',
291
  'snippet': msg['snippet'] if 'snippet' in msg else '',
292
  }
 
 
293
  for header in headers:
294
  name = header['name'].lower()
295
  if name == 'subject':
 
300
  email_data['to'] = header['value']
301
  elif name == 'cc':
302
  email_data['cc'] = header['value']
 
 
303
  if 'parts' in msg['payload']:
304
  for part in msg['payload']['parts']:
305
  if part['mimeType'] == 'text/plain':
 
310
  break
311
  elif 'data' in msg['payload']['body']:
312
  email_data['content'] = base64.urlsafe_b64decode(msg['payload']['body']['data']).decode('utf-8')
 
313
  email_list.append(email_data)
 
314
  return email_list
315
 
316
  except Exception as e:
app/router/mail.py CHANGED
@@ -71,5 +71,5 @@ def get(query: MailReqData, request: Request):
71
  scopes=cred_dict["scopes"],
72
  )
73
  mailservice = build("gmail", "v1", credentials=credentials)
74
- result = mail.get_emails(mailservice, query.query)
75
  return JSONResponse(content= result)
 
71
  scopes=cred_dict["scopes"],
72
  )
73
  mailservice = build("gmail", "v1", credentials=credentials)
74
+ result = mail.get_emails(mailservice, query.query, query.query.max_results)
75
  return JSONResponse(content= result)
app/schema/__init__.py CHANGED
@@ -2,6 +2,25 @@
2
  from typing import Optional, List
3
  from pydantic import BaseModel, Field
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  class ReqData(BaseModel):
6
  """
7
  RequestData is a Pydantic model that represents the data structure for a request.
@@ -28,7 +47,7 @@ class MailReqData(BaseModel):
28
  query (str): The query or message content sent by the user.
29
  """
30
  email: str
31
- query: Optional[str] = ""
32
 
33
  class ReqFollowUp(BaseModel):
34
  """
 
2
  from typing import Optional, List
3
  from pydantic import BaseModel, Field
4
 
5
+ class EmailQuery(BaseModel):
6
+ """
7
+ EmailQuery model representing the structure of an email query.
8
+
9
+ Attributes:
10
+ subject (Optional[str]): The subject of the email to search for.
11
+ from_email (Optional[str]): The sender's email address.
12
+ to_email (Optional[str]): The recipient's email address.
13
+ cc_email (Optional[str]): The CC email address.
14
+ after (Optional[str]): The date after which to search for emails.
15
+ max_results (Optional[int]): The maximum number of results to return.
16
+ """
17
+ subject: Optional[str]
18
+ from_email: Optional[str] = Field(None, alias="from")
19
+ to_email: Optional[str] = Field(None, alias="to")
20
+ cc_email: Optional[str] = Field(None, alias="cc")
21
+ after: Optional[str]
22
+ max_results: Optional[int] = 10
23
+
24
  class ReqData(BaseModel):
25
  """
26
  RequestData is a Pydantic model that represents the data structure for a request.
 
47
  query (str): The query or message content sent by the user.
48
  """
49
  email: str
50
+ query: EmailQuery
51
 
52
  class ReqFollowUp(BaseModel):
53
  """