Spaces:

logasanjeev
/

DocTalk

Sleeping

App Files Files Community

logasanjeev commited on 15 days ago

Commit

2e07c12

verified ·

1 Parent(s): 88ec884

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -2

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from pptx import Presentation
 from io import BytesIO
 import shutil
@@ -37,6 +38,29 @@ chat_history = []
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 PERSIST_DIRECTORY = tempfile.mkdtemp()  # Use temporary directory for ChromaDB
 # Custom PPTX loader
 class PPTXLoader:
     def __init__(self, file_path):
@@ -164,7 +188,9 @@ def initialize_qa_chain(temperature):
         qa_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vector_store.as_retriever(search_kwargs={"k": k}),
-            memory=memory
         )
         logger.info(f"Initialized QA chain with {LLM_MODEL} and k={k}.")
         return "QA Doctor: QA chain initialized successfully.", None
@@ -261,7 +287,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DocTalk: Document Q&A Chatbot") as
             status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=1):
-            temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature")
             chunk_size = gr.Slider(minimum=500, maximum=2000, step=100, value=1000, label="Chunk Size")
             chunk_overlap = gr.Slider(minimum=0, maximum=500, step=50, value=100, label="Chunk Overlap")
             init_button = gr.Button("Initialize QA Chain")

 from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
+from langchain.prompts import PromptTemplate
 from pptx import Presentation
 from io import BytesIO
 import shutil
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 PERSIST_DIRECTORY = tempfile.mkdtemp()  # Use temporary directory for ChromaDB
+# Custom prompt template
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(
+    """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question, incorporating relevant context from the conversation.
+    Chat History:
+    {chat_history}
+    Follow-up Question: {question}
+    Standalone Question:"""
+)
+QA_PROMPT = PromptTemplate.from_template(
+    """You are a precise and factual assistant. Using the provided context, answer the question by checking if the exact word or phrase asked about is present in the context. If the question asks if a word is mentioned, include cases where the word appears as part of a larger word or phrase (e.g., "hugging" in "hugging face"). Do not make assumptions beyond the context. If the word is not present, say so clearly.
+    Context:
+    {context}
+    Question: {question}
+    Answer:"""
+)
 # Custom PPTX loader
 class PPTXLoader:
     def __init__(self, file_path):
         qa_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vector_store.as_retriever(search_kwargs={"k": k}),
+            memory=memory,
+            condense_question_prompt=CONDENSE_QUESTION_PROMPT,
+            combine_docs_chain_kwargs={"prompt": QA_PROMPT}
         )
         logger.info(f"Initialized QA chain with {LLM_MODEL} and k={k}.")
         return "QA Doctor: QA chain initialized successfully.", None
             status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=1):
+            temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.1, label="Temperature")
             chunk_size = gr.Slider(minimum=500, maximum=2000, step=100, value=1000, label="Chunk Size")
             chunk_overlap = gr.Slider(minimum=0, maximum=500, step=50, value=100, label="Chunk Overlap")
             init_button = gr.Button("Initialize QA Chain")