chat-with-pci-dss

Sleeping

App Files Files Community

dh-mc commited on Feb 5, 2024

Commit

9e7327c

1 Parent(s): 7190ef8

added llm_qa_chain_with_memory.py

Browse files

Files changed (7) hide show

.gitignore +1 -0
Makefile +4 -1
app.py +9 -4
app_modules/init.py +10 -1
app_modules/llm_qa_chain_with_memory.py +32 -0
app_modules/utils.py +4 -0
qa_chain_with_memory_test.py +104 -0

.gitignore CHANGED Viewed

@@ -1,4 +1,5 @@
 *.out
 pdfs/
 .vscode/

 *.out
+*.log
 pdfs/
 .vscode/

Makefile CHANGED Viewed

@@ -5,8 +5,11 @@ start:
 test:
 	python qa_chain_test.py
 chat:
-	python qa_chain_test.py chat
 ingest:
 	python ingest.py

 test:
 	python qa_chain_test.py
+long-test:
+	python qa_chain_with_memory_test.py 100
 chat:
+	python qa_chain_with_memory_test.py chat
 ingest:
 	python ingest.py

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from timeit import default_timer as timer
 import gradio as gr
 from app_modules.init import app_init
 from app_modules.utils import print_llm_response
@@ -29,10 +31,13 @@ href = (
 )
 title = "Chat with PCI DSS v4"
-examples = [
-    "What's PCI DSS?",
-    "Can you summarize the changes made from PCI DSS version 3.2.1 to version 4.0?",
-]
 description = f"""\
 <div align="left">

 import gradio as gr
+os.environ["USER_CONVERSATION_SUMMARY_BUFFER_MEMORY"] = "true"
 from app_modules.init import app_init
 from app_modules.utils import print_llm_response
 )
 title = "Chat with PCI DSS v4"
+questions_file_path = os.environ.get("QUESTIONS_FILE_PATH")
+# Open the file for reading
+with open(questions_file_path, "r") as file:
+    examples = file.readlines()
+    examples = [example.strip() for example in examples]
 description = f"""\
 <div align="left">

app_modules/init.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Main entrypoint for the app."""
 import os
 from timeit import default_timer as timer
 from typing import List, Optional
@@ -9,7 +10,6 @@ from langchain.vectorstores.chroma import Chroma
 from langchain.vectorstores.faiss import FAISS
 from app_modules.llm_loader import LLMLoader
-from app_modules.llm_qa_chain import QAChain
 from app_modules.utils import get_device_types, init_settings
 found_dotenv = find_dotenv(".env")
@@ -27,6 +27,15 @@ if os.environ.get("LANGCHAIN_DEBUG") == "true":
     langchain.debug = True
 def app_init():
     # https://github.com/huggingface/transformers/issues/17611

 """Main entrypoint for the app."""
 import os
 from timeit import default_timer as timer
 from typing import List, Optional
 from langchain.vectorstores.faiss import FAISS
 from app_modules.llm_loader import LLMLoader
 from app_modules.utils import get_device_types, init_settings
 found_dotenv = find_dotenv(".env")
     langchain.debug = True
+if os.environ.get("USER_CONVERSATION_SUMMARY_BUFFER_MEMORY") == "true":
+    from app_modules.llm_qa_chain_with_memory import QAChain
+    print("using llm_qa_chain_with_memory")
+else:
+    from app_modules.llm_qa_chain import QAChain
+    print("using llm_qa_chain")
 def app_init():
     # https://github.com/huggingface/transformers/issues/17611

app_modules/llm_qa_chain_with_memory.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from langchain.chains import ConversationalRetrievalChain
+from langchain.chains.base import Chain
+from langchain.memory import ConversationSummaryBufferMemory
+from app_modules.llm_inference import LLMInference
+class QAChain(LLMInference):
+    def __init__(self, vectorstore, llm_loader):
+        super().__init__(llm_loader)
+        self.vectorstore = vectorstore
+    def create_chain(self) -> Chain:
+        memory = ConversationSummaryBufferMemory(
+            llm=self.llm_loader.llm,
+            output_key="answer",
+            memory_key="chat_history",
+            max_token_limit=1024,
+            return_messages=True,
+        )
+        qa = ConversationalRetrievalChain.from_llm(
+            self.llm_loader.llm,
+            memory=memory,
+            chain_type="stuff",
+            retriever=self.vectorstore.as_retriever(
+                search_kwargs=self.llm_loader.search_kwargs
+            ),
+            get_chat_history=lambda h: h,
+            return_source_documents=True,
+        )
+        return qa

app_modules/utils.py CHANGED Viewed

@@ -85,6 +85,10 @@ def print_llm_response(llm_response):
             source["page_content"] if "page_content" in source else source.page_content
         )
 def get_device_types():
     print("Running on: ", platform.platform())

             source["page_content"] if "page_content" in source else source.page_content
         )
+    if "chat_history" in llm_response:
+        print("\nChat History:")
+        print(llm_response["chat_history"])
 def get_device_types():
     print("Running on: ", platform.platform())

qa_chain_with_memory_test.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import sys
+from timeit import default_timer as timer
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.schema import LLMResult
+os.environ["USER_CONVERSATION_SUMMARY_BUFFER_MEMORY"] = "true"
+from app_modules.init import app_init
+from app_modules.utils import print_llm_response
+llm_loader, qa_chain = app_init()
+class MyCustomHandler(BaseCallbackHandler):
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.texts = []
+    def get_standalone_question(self) -> str:
+        return self.texts[0].strip() if len(self.texts) > 0 else None
+    def on_llm_end(self, response: LLMResult, **kwargs) -> None:
+        """Run when chain ends running."""
+        print("\n<on_llm_end>")
+        # print(response)
+        self.texts.append(response.generations[0][0].text)
+num_of_test_runs = 1
+chatting = len(sys.argv) > 1 and sys.argv[1] == "chat"
+if len(sys.argv) > 1 and not chatting:
+    num_of_test_runs = int(sys.argv[1])
+questions_file_path = os.environ.get("QUESTIONS_FILE_PATH")
+chat_history_enabled = os.environ.get("CHAT_HISTORY_ENABLED") or "true"
+custom_handler = MyCustomHandler()
+# Chatbot loop
+chat_history = []
+# Open the file for reading
+file = open(questions_file_path, "r")
+# Read the contents of the file into a list of strings
+questions = file.readlines()
+for i in range(len(questions)):
+    questions[i] = questions[i].strip()
+if num_of_test_runs > 1:
+    new_questions = []
+    for i in range(num_of_test_runs):
+        new_questions += questions
+    questions = new_questions
+# Close the file
+file.close()
+if __name__ == "__main__":
+    questions.append("exit")
+    chat_start = timer()
+    while True:
+        if chatting:
+            query = input("Please enter your question: ")
+        else:
+            query = questions.pop(0)
+        query = query.strip()
+        if query.lower() == "exit":
+            break
+        print("\nQuestion: " + query)
+        custom_handler.reset()
+        start = timer()
+        result = qa_chain.call_chain(
+            {"question": query, "chat_history": chat_history},
+            custom_handler,
+            None,
+            True,
+        )
+        end = timer()
+        print(f"Completed in {end - start:.3f}s")
+        if chat_history_enabled == "true":
+            chat_history.append((query, result["answer"]))
+        print_llm_response(result)
+    chat_end = timer()
+    total_time = chat_end - chat_start
+    print(f"Total time used: {total_time:.3f} s")
+    print(f"Number of tokens generated: {llm_loader.streamer.total_tokens}")
+    print(
+        f"Average generation speed: {llm_loader.streamer.total_tokens / total_time:.3f} tokens/s"
+    )