HeRksTAn commited on
Commit
f444263
·
1 Parent(s): ccc0624
Files changed (1) hide show
  1. app.py +39 -39
app.py CHANGED
@@ -2,17 +2,17 @@ import chainlit as cl
2
  from chainlit.playground.providers import ChatOpenAI
3
  from dotenv import load_dotenv
4
  # from langchain_community.document_loaders import PyMuPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- import tiktoken
7
- from langchain.prompts import ChatPromptTemplate
8
- from operator import itemgetter
9
- from langchain_core.runnables import RunnablePassthrough
10
- from langchain import ChatOpenAI, OpenAIEmbeddings
11
- from langchain_community.vectorstores import FAISS
12
- from langchain.retrievers import MultiQueryRetriever
13
-
14
- from langchain.prompts import ChatPromptTemplate
15
- from langchain.retrievers import MultiQueryRetriever
16
 
17
  template = """
18
  you can only answer questions related to what's in the context. If it's not in the context, then you would reply with
@@ -34,43 +34,43 @@ init_settings = {
34
  "presence_penalty": 0,
35
  }
36
 
37
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
38
 
39
 
40
  load_dotenv()
41
 
42
- def tiktoken_len(text):
43
- tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
44
- text,
45
- )
46
- return len(tokens)
47
 
48
 
49
  @cl.on_chat_start
50
  async def main():
51
  model = ChatOpenAI(streaming=True)
52
 
53
- prompt = ChatPromptTemplate.from_template(template)
54
 
55
- nvida_doc = PyMuPDFLoader('../docs/nvidia-document.pdf')
56
- data = nvida_doc.load()
57
 
58
- text_splitter = RecursiveCharacterTextSplitter(
59
- chunk_size = 1700,
60
- chunk_overlap = 0,
61
- length_function = tiktoken_len)
62
 
63
- nvidia_doc_chunks = text_splitter.split_documents(data)
64
 
65
- vector_store = FAISS.from_documents(nvidia_doc_chunks, embedding=embeddings)
66
 
67
- retriever = vector_store.as_retriever()
68
- advanced_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=model)
69
 
70
- runnable = (
71
- {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
72
- | RunnablePassthrough.assign(context=itemgetter("context"))
73
- | {"response": prompt | model, "context": itemgetter("context")})
74
 
75
  # retrieval_qa_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
76
  # document_chain = create_stuff_documents_chain(model, retrieval_qa_prompt)
@@ -79,7 +79,7 @@ async def main():
79
  # cl.user_session.set("settings", init_settings)
80
  # cl.user_session.set("nvidia_doc", data)
81
 
82
- cl.user_session.set("runnable", runnable)
83
 
84
 
85
 
@@ -87,9 +87,9 @@ async def main():
87
  async def on_message(message: cl.Message):
88
  # settings = cl.user_session.get("settings")
89
  # nvida_doc = cl.user_session.get("nvidia_doc")
90
- runnable = cl.user_session.get("runnable")
91
 
92
- msg = cl.Message(content="")
93
 
94
 
95
  # async for chunk in runnable.astream(
@@ -100,10 +100,10 @@ async def on_message(message: cl.Message):
100
 
101
  # await msg.send()
102
 
103
- inputs = {"question": message.content}
104
- result = await runnable.ainvoke(inputs)
105
- msg = cl.Message(content=result["response"].content)
106
- await msg.send()
107
 
108
 
109
 
 
2
  from chainlit.playground.providers import ChatOpenAI
3
  from dotenv import load_dotenv
4
  # from langchain_community.document_loaders import PyMuPDFLoader
5
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ # import tiktoken
7
+ # from langchain.prompts import ChatPromptTemplate
8
+ # from operator import itemgetter
9
+ # from langchain_core.runnables import RunnablePassthrough
10
+ # from langchain import ChatOpenAI, OpenAIEmbeddings
11
+ # from langchain_community.vectorstores import FAISS
12
+ # from langchain.retrievers import MultiQueryRetriever
13
+
14
+ # from langchain.prompts import ChatPromptTemplate
15
+ # from langchain.retrievers import MultiQueryRetriever
16
 
17
  template = """
18
  you can only answer questions related to what's in the context. If it's not in the context, then you would reply with
 
34
  "presence_penalty": 0,
35
  }
36
 
37
+ # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
38
 
39
 
40
  load_dotenv()
41
 
42
+ # def tiktoken_len(text):
43
+ # tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
44
+ # text,
45
+ # )
46
+ # return len(tokens)
47
 
48
 
49
  @cl.on_chat_start
50
  async def main():
51
  model = ChatOpenAI(streaming=True)
52
 
53
+ # prompt = ChatPromptTemplate.from_template(template)
54
 
55
+ # nvida_doc = PyMuPDFLoader('../docs/nvidia-document.pdf')
56
+ # data = nvida_doc.load()
57
 
58
+ # text_splitter = RecursiveCharacterTextSplitter(
59
+ # chunk_size = 1700,
60
+ # chunk_overlap = 0,
61
+ # length_function = tiktoken_len)
62
 
63
+ # nvidia_doc_chunks = text_splitter.split_documents(data)
64
 
65
+ # vector_store = FAISS.from_documents(nvidia_doc_chunks, embedding=embeddings)
66
 
67
+ # retriever = vector_store.as_retriever()
68
+ # advanced_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=model)
69
 
70
+ # runnable = (
71
+ # {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
72
+ # | RunnablePassthrough.assign(context=itemgetter("context"))
73
+ # | {"response": prompt | model, "context": itemgetter("context")})
74
 
75
  # retrieval_qa_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
76
  # document_chain = create_stuff_documents_chain(model, retrieval_qa_prompt)
 
79
  # cl.user_session.set("settings", init_settings)
80
  # cl.user_session.set("nvidia_doc", data)
81
 
82
+ # cl.user_session.set("runnable", runnable)
83
 
84
 
85
 
 
87
  async def on_message(message: cl.Message):
88
  # settings = cl.user_session.get("settings")
89
  # nvida_doc = cl.user_session.get("nvidia_doc")
90
+ # runnable = cl.user_session.get("runnable")
91
 
92
+ # msg = cl.Message(content="")
93
 
94
 
95
  # async for chunk in runnable.astream(
 
100
 
101
  # await msg.send()
102
 
103
+ # inputs = {"question": message.content}
104
+ # result = await runnable.ainvoke(inputs)
105
+ # msg = cl.Message(content=result["response"].content)
106
+ # await msg.send()
107
 
108
 
109