Spaces:
Running
Running
Refactor mail handling: update collect function to use MailReqData model and remove unused code
Browse files- app/controllers/mail.py +1 -2
- app/models/chroma/__init__.py +0 -27
- app/models/mails/__init__.py +0 -1
- app/playground/app.py +0 -44
- app/playground/phi-4-mini-instruct.py +0 -23
- app/playground/test.py +0 -5
- app/router/mail.py +6 -4
- app/schema/__init__.py +11 -0
app/controllers/mail.py
CHANGED
@@ -6,7 +6,6 @@ from datetime import datetime, timedelta
|
|
6 |
from venv import logger
|
7 |
from ics import Calendar
|
8 |
|
9 |
-
# import pandas as pd
|
10 |
from langchain_core.documents import Document
|
11 |
from langchain_community.document_loaders import (
|
12 |
PyPDFLoader,
|
@@ -16,7 +15,6 @@ from langchain_community.document_loaders import (
|
|
16 |
)
|
17 |
|
18 |
from models.db import vectorstore
|
19 |
-
# from models.mails import build_gmail_service
|
20 |
|
21 |
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
|
22 |
EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
|
@@ -78,6 +76,7 @@ def list_emails(service, messages):
|
|
78 |
for message in messages:
|
79 |
msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
|
80 |
metadata = {}
|
|
|
81 |
if msg["id"] in vectorstore.index_to_docstore_id:
|
82 |
logger.info("Email already exists in the database.")
|
83 |
continue
|
|
|
6 |
from venv import logger
|
7 |
from ics import Calendar
|
8 |
|
|
|
9 |
from langchain_core.documents import Document
|
10 |
from langchain_community.document_loaders import (
|
11 |
PyPDFLoader,
|
|
|
15 |
)
|
16 |
|
17 |
from models.db import vectorstore
|
|
|
18 |
|
19 |
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
|
20 |
EMAIL_PATTERN = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
|
|
|
76 |
for message in messages:
|
77 |
msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
|
78 |
metadata = {}
|
79 |
+
logger.info("vectorstore.index_to_docstore_id: %s", vectorstore.index_to_docstore_id)
|
80 |
if msg["id"] in vectorstore.index_to_docstore_id:
|
81 |
logger.info("Email already exists in the database.")
|
82 |
continue
|
app/models/chroma/__init__.py
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
# """Module for the Vector Database."""
|
2 |
-
# from langchain_chroma import Chroma
|
3 |
-
# from models.llm import EmbeddingsModel
|
4 |
-
|
5 |
-
# vectorstore = Chroma(
|
6 |
-
# embedding_function=EmbeddingsModel("all-MiniLM-L6-v2"),
|
7 |
-
# collection_name="email",
|
8 |
-
# persist_directory="models/chroma/data"
|
9 |
-
# )
|
10 |
-
|
11 |
-
# # def create_or_get_collection(collection_name: str):
|
12 |
-
# # """
|
13 |
-
# # Creates a new collection or gets an existing collection from the Vector Database.
|
14 |
-
|
15 |
-
# # Args:
|
16 |
-
# # collection_name (str): The name of the collection.
|
17 |
-
|
18 |
-
# # Returns:
|
19 |
-
# # chromadb.Collection: The collection associated with the provided name.
|
20 |
-
# # """
|
21 |
-
# # chroma_client = chromadb.PersistentClient(path="models/chroma/data")
|
22 |
-
# # collection = chroma_client.get_or_create_collection(collection_name)
|
23 |
-
# # # try:
|
24 |
-
# # # collection = chroma_client.create_collection(collection_name)
|
25 |
-
# # # except chromadb.errors.UniqueConstraintError:
|
26 |
-
# # # collection = chroma_client.get_collection(collection_name)
|
27 |
-
# # return collection
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/models/mails/__init__.py
CHANGED
@@ -3,7 +3,6 @@ import os.path
|
|
3 |
import pickle
|
4 |
|
5 |
from google.auth.transport.requests import Request
|
6 |
-
# from google.oauth2.credentials import Credentials
|
7 |
from google_auth_oauthlib.flow import InstalledAppFlow
|
8 |
from googleapiclient.discovery import build
|
9 |
|
|
|
3 |
import pickle
|
4 |
|
5 |
from google.auth.transport.requests import Request
|
|
|
6 |
from google_auth_oauthlib.flow import InstalledAppFlow
|
7 |
from googleapiclient.discovery import build
|
8 |
|
app/playground/app.py
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
"""Streamlit app example."""
|
2 |
-
import logging
|
3 |
-
import uuid
|
4 |
-
import streamlit as st
|
5 |
-
|
6 |
-
from chain import RAGChain
|
7 |
-
from retriever import DocRetriever
|
8 |
-
from controllers import mail
|
9 |
-
|
10 |
-
logging.basicConfig(
|
11 |
-
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
12 |
-
logging.getLogger().setLevel(logging.ERROR)
|
13 |
-
|
14 |
-
with st.sidebar:
|
15 |
-
st.header("Controls")
|
16 |
-
if st.button("Collect Data"):
|
17 |
-
result = mail.collect()
|
18 |
-
with st.chat_message("assistant"):
|
19 |
-
response_content = st.markdown(result)
|
20 |
-
|
21 |
-
if 'chat_id' not in st.session_state:
|
22 |
-
st.session_state.chat_id = str(uuid.uuid4())
|
23 |
-
st.session_state.user_id = str(uuid.uuid4())
|
24 |
-
|
25 |
-
if "messages" not in st.session_state:
|
26 |
-
st.session_state.messages = []
|
27 |
-
|
28 |
-
for message in st.session_state.messages:
|
29 |
-
with st.chat_message(message["role"]):
|
30 |
-
st.markdown(message["content"])
|
31 |
-
|
32 |
-
if prompt := st.chat_input("What is up?"):
|
33 |
-
|
34 |
-
st.session_state.messages.append({"role": "user", "content": prompt})
|
35 |
-
with st.chat_message("user"):
|
36 |
-
st.markdown(prompt)
|
37 |
-
req = {"query": prompt}
|
38 |
-
chain = RAGChain(DocRetriever(req=req))
|
39 |
-
|
40 |
-
result = chain.invoke({"input": req['query']},
|
41 |
-
config={"configurable": {"session_id": st.session_state.chat_id}})
|
42 |
-
with st.chat_message("assistant"):
|
43 |
-
response_content = st.markdown(result['answer'])
|
44 |
-
st.session_state.messages.append({"role": "assistant", "content": result['answer']})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/playground/phi-4-mini-instruct.py
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
from langchain_huggingface import HuggingFacePipeline
|
2 |
-
|
3 |
-
# Define the model ID
|
4 |
-
model_id = "gpt2"
|
5 |
-
model_id = "microsoft/Phi-4-mini-instruct"
|
6 |
-
model_id = "Qwen/Qwen2.5-7B-Instruct"
|
7 |
-
model_id = "microsoft/Phi-3-small-8k-instruct"
|
8 |
-
|
9 |
-
# Create a pipeline for text generation
|
10 |
-
llm = HuggingFacePipeline.from_model_id(
|
11 |
-
model_id=model_id,
|
12 |
-
task="text-generation",
|
13 |
-
device=-1,
|
14 |
-
# trust_remote_code=True,
|
15 |
-
pipeline_kwargs={
|
16 |
-
"max_new_tokens": 256,
|
17 |
-
"top_k": 50
|
18 |
-
},
|
19 |
-
)
|
20 |
-
|
21 |
-
# Use the model to generate text
|
22 |
-
response = llm.invoke("Hello, how are you?")
|
23 |
-
print(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/playground/test.py
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
from controllers import mail
|
2 |
-
|
3 |
-
if __name__ == "__main__":
|
4 |
-
mail.collect()
|
5 |
-
# mail.get_documents()
|
|
|
|
|
|
|
|
|
|
|
|
app/router/mail.py
CHANGED
@@ -8,10 +8,12 @@ from controllers import mail
|
|
8 |
from google.oauth2.credentials import Credentials
|
9 |
from googleapiclient.discovery import build
|
10 |
|
|
|
|
|
11 |
router = APIRouter(prefix="/mail", tags=["mail"])
|
12 |
|
13 |
@router.post("")
|
14 |
-
def collect(
|
15 |
"""
|
16 |
Handles the chat POST request.
|
17 |
|
@@ -22,8 +24,8 @@ def collect(email: str, request: Request):
|
|
22 |
str: The generated response from the chat function.
|
23 |
"""
|
24 |
try:
|
25 |
-
if os.path.exists(f"cache/{email}.pickle"):
|
26 |
-
with open(f"cache/{email}.pickle", "rb") as token:
|
27 |
credentials = pickle.load(token)
|
28 |
else:
|
29 |
cred_dict = request.state.session.get("credential")
|
@@ -36,7 +38,7 @@ def collect(email: str, request: Request):
|
|
36 |
scopes=cred_dict["scopes"],
|
37 |
)
|
38 |
mailservice = build("gmail", "v1", credentials=credentials)
|
39 |
-
mail.collect(mailservice)
|
40 |
return JSONResponse(content={"message": "Mail collected successfully."})
|
41 |
except Exception as e:
|
42 |
return JSONResponse(content={"error": str(e)}, status_code=500)
|
|
|
8 |
from google.oauth2.credentials import Credentials
|
9 |
from googleapiclient.discovery import build
|
10 |
|
11 |
+
from schema import MailReqData
|
12 |
+
|
13 |
router = APIRouter(prefix="/mail", tags=["mail"])
|
14 |
|
15 |
@router.post("")
|
16 |
+
def collect(query: MailReqData, request: Request):
|
17 |
"""
|
18 |
Handles the chat POST request.
|
19 |
|
|
|
24 |
str: The generated response from the chat function.
|
25 |
"""
|
26 |
try:
|
27 |
+
if os.path.exists(f"cache/{query.email}.pickle"):
|
28 |
+
with open(f"cache/{query.email}.pickle", "rb") as token:
|
29 |
credentials = pickle.load(token)
|
30 |
else:
|
31 |
cred_dict = request.state.session.get("credential")
|
|
|
38 |
scopes=cred_dict["scopes"],
|
39 |
)
|
40 |
mailservice = build("gmail", "v1", credentials=credentials)
|
41 |
+
mail.collect(mailservice, query.query)
|
42 |
return JSONResponse(content={"message": "Mail collected successfully."})
|
43 |
except Exception as e:
|
44 |
return JSONResponse(content={"error": str(e)}, status_code=500)
|
app/schema/__init__.py
CHANGED
@@ -19,6 +19,17 @@ class ReqData(BaseModel):
|
|
19 |
user_id: str
|
20 |
web: Optional[bool] = False
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
class ReqFollowUp(BaseModel):
|
23 |
"""
|
24 |
RequestFollowUp is a Pydantic model that represents a request for follow-up.
|
|
|
19 |
user_id: str
|
20 |
web: Optional[bool] = False
|
21 |
|
22 |
+
class MailReqData(BaseModel):
|
23 |
+
"""
|
24 |
+
MailReqData is a data model representing the structure of a mail request.
|
25 |
+
|
26 |
+
Attributes:
|
27 |
+
email (str): The email address of the sender.
|
28 |
+
query (str): The query or message content sent by the user.
|
29 |
+
"""
|
30 |
+
email: str
|
31 |
+
query: str
|
32 |
+
|
33 |
class ReqFollowUp(BaseModel):
|
34 |
"""
|
35 |
RequestFollowUp is a Pydantic model that represents a request for follow-up.
|