gavinzli commited on
Commit
a171580
·
1 Parent(s): 13884fc

Refactor mail handling: update collect function to use MailReqData model and remove unused code

Browse files
app/controllers/mail.py CHANGED
@@ -6,7 +6,6 @@ from datetime import datetime, timedelta
6
  from venv import logger
7
  from ics import Calendar
8
 
9
- # import pandas as pd
10
  from langchain_core.documents import Document
11
  from langchain_community.document_loaders import (
12
  PyPDFLoader,
@@ -16,7 +15,6 @@ from langchain_community.document_loaders import (
16
  )
17
 
18
  from models.db import vectorstore
19
- # from models.mails import build_gmail_service
20
 
21
  SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
22
  EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
@@ -78,6 +76,7 @@ def list_emails(service, messages):
78
  for message in messages:
79
  msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
80
  metadata = {}
 
81
  if msg["id"] in vectorstore.index_to_docstore_id:
82
  logger.info("Email already exists in the database.")
83
  continue
 
6
  from venv import logger
7
  from ics import Calendar
8
 
 
9
  from langchain_core.documents import Document
10
  from langchain_community.document_loaders import (
11
  PyPDFLoader,
 
15
  )
16
 
17
  from models.db import vectorstore
 
18
 
19
  SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
20
  EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
 
76
  for message in messages:
77
  msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
78
  metadata = {}
79
+ logger.info("vectorstore.index_to_docstore_id: %s", vectorstore.index_to_docstore_id)
80
  if msg["id"] in vectorstore.index_to_docstore_id:
81
  logger.info("Email already exists in the database.")
82
  continue
app/models/chroma/__init__.py DELETED
@@ -1,27 +0,0 @@
1
- # """Module for the Vector Database."""
2
- # from langchain_chroma import Chroma
3
- # from models.llm import EmbeddingsModel
4
-
5
- # vectorstore = Chroma(
6
- # embedding_function=EmbeddingsModel("all-MiniLM-L6-v2"),
7
- # collection_name="email",
8
- # persist_directory="models/chroma/data"
9
- # )
10
-
11
- # # def create_or_get_collection(collection_name: str):
12
- # # """
13
- # # Creates a new collection or gets an existing collection from the Vector Database.
14
-
15
- # # Args:
16
- # # collection_name (str): The name of the collection.
17
-
18
- # # Returns:
19
- # # chromadb.Collection: The collection associated with the provided name.
20
- # # """
21
- # # chroma_client = chromadb.PersistentClient(path="models/chroma/data")
22
- # # collection = chroma_client.get_or_create_collection(collection_name)
23
- # # # try:
24
- # # # collection = chroma_client.create_collection(collection_name)
25
- # # # except chromadb.errors.UniqueConstraintError:
26
- # # # collection = chroma_client.get_collection(collection_name)
27
- # # return collection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/models/mails/__init__.py CHANGED
@@ -3,7 +3,6 @@ import os.path
3
  import pickle
4
 
5
  from google.auth.transport.requests import Request
6
- # from google.oauth2.credentials import Credentials
7
  from google_auth_oauthlib.flow import InstalledAppFlow
8
  from googleapiclient.discovery import build
9
 
 
3
  import pickle
4
 
5
  from google.auth.transport.requests import Request
 
6
  from google_auth_oauthlib.flow import InstalledAppFlow
7
  from googleapiclient.discovery import build
8
 
app/playground/app.py DELETED
@@ -1,44 +0,0 @@
1
- """Streamlit app example."""
2
- import logging
3
- import uuid
4
- import streamlit as st
5
-
6
- from chain import RAGChain
7
- from retriever import DocRetriever
8
- from controllers import mail
9
-
10
- logging.basicConfig(
11
- format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
12
- logging.getLogger().setLevel(logging.ERROR)
13
-
14
- with st.sidebar:
15
- st.header("Controls")
16
- if st.button("Collect Data"):
17
- result = mail.collect()
18
- with st.chat_message("assistant"):
19
- response_content = st.markdown(result)
20
-
21
- if 'chat_id' not in st.session_state:
22
- st.session_state.chat_id = str(uuid.uuid4())
23
- st.session_state.user_id = str(uuid.uuid4())
24
-
25
- if "messages" not in st.session_state:
26
- st.session_state.messages = []
27
-
28
- for message in st.session_state.messages:
29
- with st.chat_message(message["role"]):
30
- st.markdown(message["content"])
31
-
32
- if prompt := st.chat_input("What is up?"):
33
-
34
- st.session_state.messages.append({"role": "user", "content": prompt})
35
- with st.chat_message("user"):
36
- st.markdown(prompt)
37
- req = {"query": prompt}
38
- chain = RAGChain(DocRetriever(req=req))
39
-
40
- result = chain.invoke({"input": req['query']},
41
- config={"configurable": {"session_id": st.session_state.chat_id}})
42
- with st.chat_message("assistant"):
43
- response_content = st.markdown(result['answer'])
44
- st.session_state.messages.append({"role": "assistant", "content": result['answer']})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/playground/phi-4-mini-instruct.py DELETED
@@ -1,23 +0,0 @@
1
- from langchain_huggingface import HuggingFacePipeline
2
-
3
- # Define the model ID
4
- model_id = "gpt2"
5
- model_id = "microsoft/Phi-4-mini-instruct"
6
- model_id = "Qwen/Qwen2.5-7B-Instruct"
7
- model_id = "microsoft/Phi-3-small-8k-instruct"
8
-
9
- # Create a pipeline for text generation
10
- llm = HuggingFacePipeline.from_model_id(
11
- model_id=model_id,
12
- task="text-generation",
13
- device=-1,
14
- # trust_remote_code=True,
15
- pipeline_kwargs={
16
- "max_new_tokens": 256,
17
- "top_k": 50
18
- },
19
- )
20
-
21
- # Use the model to generate text
22
- response = llm.invoke("Hello, how are you?")
23
- print(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/playground/test.py DELETED
@@ -1,5 +0,0 @@
1
- from controllers import mail
2
-
3
- if __name__ == "__main__":
4
- mail.collect()
5
- # mail.get_documents()
 
 
 
 
 
 
app/router/mail.py CHANGED
@@ -8,10 +8,12 @@ from controllers import mail
8
  from google.oauth2.credentials import Credentials
9
  from googleapiclient.discovery import build
10
 
 
 
11
  router = APIRouter(prefix="/mail", tags=["mail"])
12
 
13
  @router.post("")
14
- def collect(email: str, request: Request):
15
  """
16
  Handles the chat POST request.
17
 
@@ -22,8 +24,8 @@ def collect(email: str, request: Request):
22
  str: The generated response from the chat function.
23
  """
24
  try:
25
- if os.path.exists(f"cache/{email}.pickle"):
26
- with open(f"cache/{email}.pickle", "rb") as token:
27
  credentials = pickle.load(token)
28
  else:
29
  cred_dict = request.state.session.get("credential")
@@ -36,7 +38,7 @@ def collect(email: str, request: Request):
36
  scopes=cred_dict["scopes"],
37
  )
38
  mailservice = build("gmail", "v1", credentials=credentials)
39
- mail.collect(mailservice)
40
  return JSONResponse(content={"message": "Mail collected successfully."})
41
  except Exception as e:
42
  return JSONResponse(content={"error": str(e)}, status_code=500)
 
8
  from google.oauth2.credentials import Credentials
9
  from googleapiclient.discovery import build
10
 
11
+ from schema import MailReqData
12
+
13
  router = APIRouter(prefix="/mail", tags=["mail"])
14
 
15
  @router.post("")
16
+ def collect(query: MailReqData, request: Request):
17
  """
18
  Handles the chat POST request.
19
 
 
24
  str: The generated response from the chat function.
25
  """
26
  try:
27
+ if os.path.exists(f"cache/{query.email}.pickle"):
28
+ with open(f"cache/{query.email}.pickle", "rb") as token:
29
  credentials = pickle.load(token)
30
  else:
31
  cred_dict = request.state.session.get("credential")
 
38
  scopes=cred_dict["scopes"],
39
  )
40
  mailservice = build("gmail", "v1", credentials=credentials)
41
+ mail.collect(mailservice, query.query)
42
  return JSONResponse(content={"message": "Mail collected successfully."})
43
  except Exception as e:
44
  return JSONResponse(content={"error": str(e)}, status_code=500)
app/schema/__init__.py CHANGED
@@ -19,6 +19,17 @@ class ReqData(BaseModel):
19
  user_id: str
20
  web: Optional[bool] = False
21
 
 
 
 
 
 
 
 
 
 
 
 
22
  class ReqFollowUp(BaseModel):
23
  """
24
  RequestFollowUp is a Pydantic model that represents a request for follow-up.
 
19
  user_id: str
20
  web: Optional[bool] = False
21
 
22
+ class MailReqData(BaseModel):
23
+ """
24
+ MailReqData is a data model representing the structure of a mail request.
25
+
26
+ Attributes:
27
+ email (str): The email address of the sender.
28
+ query (str): The query or message content sent by the user.
29
+ """
30
+ email: str
31
+ query: str
32
+
33
  class ReqFollowUp(BaseModel):
34
  """
35
  RequestFollowUp is a Pydantic model that represents a request for follow-up.