Spaces:

pradeepsengarr
/

Bot_RAG

Sleeping

App Files Files Community

pradeepsengarr commited on Apr 15

Commit

39d36c9

verified ·

1 Parent(s): daa5ddb

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -27

app.py CHANGED Viewed

@@ -483,50 +483,103 @@ def setup_qa(db):
     return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
 # --- Process Answer ---
-def process_answer(question, full_text):
-    # STEP 1: Chunk the PDF text
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-    docs = text_splitter.create_documents([full_text])
-    # STEP 2: Create embeddings
-    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
-    db = Chroma.from_documents(docs, embeddings)
-    # STEP 3: Retrieve relevant chunks using the question
-    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
-    relevant_docs = retriever.get_relevant_documents(question)
-    # STEP 4: Format the context
-    context = "\n\n".join([doc.page_content for doc in relevant_docs])
-    # STEP 5: Prompting
-    prompt_template = """
-You are a helpful assistant that answers questions based on the context below.
-Context:
-{context}
-Question: {question}
-Answer:
-    """.strip()
-    prompt = prompt_template.format(context=context, question=question)
-    # STEP 6: Load the model and generate response
-    llm = HuggingFacePipeline.from_model_id(
-        model_id="MBZUAI/LaMini-T5-738M",
-        task="text2text-generation",
-        model_kwargs={"temperature": 0.3, "max_length": 256},
     )
-    return llm.invoke(prompt)
 # --- UI Layout ---
 with st.sidebar:
     st.header("📄 Upload PDF")
     uploaded_file = st.file_uploader("Choose a PDF", type=["pdf"])
 # --- Main Interface ---
 if uploaded_file:
@@ -551,6 +604,14 @@ if uploaded_file:
             st.markdown("---")
             st.markdown("**💡 Suggestions:**")
             st.caption("Try: \"Summarize this document\" or \"What is the key idea?\"")
     else:
         st.error("⚠️ No text could be extracted from the PDF. Try another file.")

     return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
 # --- Process Answer ---
+# def process_answer(question, full_text):
+#     # STEP 1: Chunk the PDF text
+#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+#     docs = text_splitter.create_documents([full_text])
+#     # STEP 2: Create embeddings
+#     embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+#     db = Chroma.from_documents(docs, embeddings)
+#     # STEP 3: Retrieve relevant chunks using the question
+#     retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
+#     relevant_docs = retriever.get_relevant_documents(question)
+#     # STEP 4: Format the context
+#     context = "\n\n".join([doc.page_content for doc in relevant_docs])
+#     # STEP 5: Prompting
+#     prompt_template = """
+# You are a helpful assistant that answers questions based on the context below.
+# Context:
+# {context}
+# Question: {question}
+# Answer:
+#     """.strip()
+#     prompt = prompt_template.format(context=context, question=question)
+#     # STEP 6: Load the model and generate response
+#     llm = HuggingFacePipeline.from_model_id(
+#         model_id="MBZUAI/LaMini-T5-738M",
+#         task="text2text-generation",
+#         model_kwargs={"temperature": 0.3, "max_length": 256},
+#     )
+#     return llm.invoke(prompt)
+def process_answer(question, full_text):
+    from langchain_community.document_loaders import TextLoader
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain.vectorstores import Chroma
+    from langchain_community.embeddings import SentenceTransformerEmbeddings
+    from langchain.chains import RetrievalQA
+    from langchain import HuggingFacePipeline
+    from transformers import pipeline
+    import os
+    import shutil
+    # Save to temp file and load it as document
+    with open("temp_text.txt", "w") as f:
+        f.write(full_text)
+    loader = TextLoader("temp_text.txt")
+    docs = loader.load()
+    # Chunking the docs
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
+    splits = text_splitter.split_documents(docs)
+    # Embeddings
+    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+    # Clean up old DB if exists
+    if os.path.exists("chroma_db"):
+        shutil.rmtree("chroma_db")
+    db = Chroma.from_documents(splits, embeddings, persist_directory="chroma_db")
+    retriever = db.as_retriever()
+    # Model pipeline
+    pipe = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", max_length=512)
+    llm = HuggingFacePipeline(pipeline=pipe)
+    # Retrieval QA chain
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=retriever,
+        return_source_documents=False
     )
+    # Check if question is about summarization
+    if "summarize" in question.lower() or "summary" in question.lower() or "tl;dr" in question.lower():
+        prompt = f"Summarize the following document:\n\n{full_text[:3000]}"  # trimming to 3K chars for model
+        summary = llm(prompt)
+        return summary
+    else:
+        answer = qa_chain.run(question)
+        return answer
 # --- UI Layout ---
 with st.sidebar:
     st.header("📄 Upload PDF")
     uploaded_file = st.file_uploader("Choose a PDF", type=["pdf"])
 # --- Main Interface ---
 if uploaded_file:
             st.markdown("---")
             st.markdown("**💡 Suggestions:**")
             st.caption("Try: \"Summarize this document\" or \"What is the key idea?\"")
+        with st.expander("💡 Suggestions", expanded=True):
+            st.markdown("""
+            - "Summarize this document"
+            - "Give a quick summary"
+            - "What are the main points?"
+            - "Explain this document in short"
+            """)
     else:
         st.error("⚠️ No text could be extracted from the PDF. Try another file.")