Mdean77 commited on
Commit
2d7499f
·
1 Parent(s): b29e219

Refactoring

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +28 -35
  3. prompts.py +26 -0
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  DS_Store
2
- .env
 
 
1
  DS_Store
2
+ .env
3
+ cache/
app.py CHANGED
@@ -16,7 +16,7 @@ from langchain_openai.embeddings import OpenAIEmbeddings
16
  from langchain.storage import LocalFileStore
17
  from langchain_qdrant import QdrantVectorStore
18
  from langchain.embeddings import CacheBackedEmbeddings
19
- from langchain_core.prompts import ChatPromptTemplate
20
  from langchain_core.globals import set_llm_cache
21
  from langchain_openai import ChatOpenAI
22
  from langchain_core.caches import InMemoryCache
@@ -24,7 +24,7 @@ from operator import itemgetter
24
  from langchain_core.runnables.passthrough import RunnablePassthrough
25
  from langchain_core.runnables.config import RunnableConfig
26
  import uuid
27
-
28
 
29
  load_dotenv()
30
 
@@ -36,8 +36,7 @@ load_dotenv()
36
  """
37
  GLOBAL CODE HERE
38
  """
39
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
40
- Loader = PyMuPDFLoader
41
  # Typical Embedding Model
42
  core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
43
 
@@ -61,43 +60,40 @@ vectorstore = QdrantVectorStore(
61
  collection_name=collection_name,
62
  embedding=cached_embedder)
63
 
64
- rag_system_prompt_template = """\
65
- You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
66
- If you cannot answer the question from the information in the context, tell the user that
67
- you cannot answer the question directly from the context, but that you will give an answer
68
- that is based on your general knowledge.
69
- """
70
-
71
- rag_message_list = [
72
- {"role" : "system", "content" : rag_system_prompt_template},
73
- ]
74
-
75
- rag_user_prompt_template = """
76
- Question:
77
- {question}
78
- Context:
79
- {context}
80
- """
81
-
82
- chat_prompt = ChatPromptTemplate.from_messages([
83
- ("system", rag_system_prompt_template),
84
- ("human", rag_user_prompt_template)
85
- ])
86
  chat_model = ChatOpenAI(model="gpt-4o")
87
  set_llm_cache(InMemoryCache())
88
 
89
  def split_file(file: AskFileMessage):
90
  import tempfile
 
 
91
  with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
92
  with open(tempfile.name, "wb") as f:
93
  f.write(file.content)
94
- # separate_pages = []
95
  loader = Loader(tempfile.name)
96
  documents = loader.load()
97
- # separate_pages.extend(page)
98
- # one_document = ""
99
- # for page in separate_pages:
100
- # one_document+= page.page_content
101
  docs = text_splitter.split_documents(documents)
102
  for i, doc in enumerate(docs):
103
  doc.metadata["source"] = f"source_{id}"
@@ -125,13 +121,10 @@ async def on_chat_start():
125
  )
126
 
127
  await msg.send()
128
- docs = split_file(file)
129
-
130
 
 
131
  vectorstore.add_documents(docs)
132
 
133
-
134
-
135
  retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 15})
136
  retrieval_augmented_qa_chain = (
137
  {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
 
16
  from langchain.storage import LocalFileStore
17
  from langchain_qdrant import QdrantVectorStore
18
  from langchain.embeddings import CacheBackedEmbeddings
19
+
20
  from langchain_core.globals import set_llm_cache
21
  from langchain_openai import ChatOpenAI
22
  from langchain_core.caches import InMemoryCache
 
24
  from langchain_core.runnables.passthrough import RunnablePassthrough
25
  from langchain_core.runnables.config import RunnableConfig
26
  import uuid
27
+ from prompts import chat_prompt
28
 
29
  load_dotenv()
30
 
 
36
  """
37
  GLOBAL CODE HERE
38
  """
39
+
 
40
  # Typical Embedding Model
41
  core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
42
 
 
60
  collection_name=collection_name,
61
  embedding=cached_embedder)
62
 
63
+ # rag_system_prompt_template = """\
64
+ # You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
65
+ # If you cannot answer the question from the information in the context, tell the user that
66
+ # you cannot answer the question directly from the context, but that you will give an answer
67
+ # that is based on your general knowledge.
68
+ # """
69
+
70
+ # rag_message_list = [
71
+ # {"role" : "system", "content" : rag_system_prompt_template},
72
+ # ]
73
+
74
+ # rag_user_prompt_template = """
75
+ # Question:
76
+ # {question}
77
+ # Context:
78
+ # {context}
79
+ # """
80
+
81
+ # chat_prompt = ChatPromptTemplate.from_messages([
82
+ # ("system", rag_system_prompt_template),
83
+ # ("human", rag_user_prompt_template)
84
+ # ])
85
  chat_model = ChatOpenAI(model="gpt-4o")
86
  set_llm_cache(InMemoryCache())
87
 
88
  def split_file(file: AskFileMessage):
89
  import tempfile
90
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
91
+ Loader = PyMuPDFLoader
92
  with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
93
  with open(tempfile.name, "wb") as f:
94
  f.write(file.content)
 
95
  loader = Loader(tempfile.name)
96
  documents = loader.load()
 
 
 
 
97
  docs = text_splitter.split_documents(documents)
98
  for i, doc in enumerate(docs):
99
  doc.metadata["source"] = f"source_{id}"
 
121
  )
122
 
123
  await msg.send()
 
 
124
 
125
+ docs = split_file(file)
126
  vectorstore.add_documents(docs)
127
 
 
 
128
  retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 15})
129
  retrieval_augmented_qa_chain = (
130
  {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
prompts.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Contains prompts, welcome messages, etc.
2
+
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+
5
+ rag_system_prompt_template = """\
6
+ You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
7
+ If you cannot answer the question from the information in the context, tell the user that
8
+ you cannot answer the question directly from the context, but that you will give an answer
9
+ that is based on your general knowledge.
10
+ """
11
+
12
+ rag_message_list = [
13
+ {"role" : "system", "content" : rag_system_prompt_template},
14
+ ]
15
+
16
+ rag_user_prompt_template = """
17
+ Question:
18
+ {question}
19
+ Context:
20
+ {context}
21
+ """
22
+
23
+ chat_prompt = ChatPromptTemplate.from_messages([
24
+ ("system", rag_system_prompt_template),
25
+ ("human", rag_user_prompt_template)
26
+ ])