Spaces:

pradeepsengarr
/

Bot_RAG

Sleeping

App Files Files Community

pradeepsengarr commited on 15 days ago

Commit

6e1c9c8

verified ·

1 Parent(s): 2427008

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -36

app.py CHANGED Viewed

@@ -138,46 +138,77 @@
-import streamlit as st from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import os
-Set Hugging Face API Token
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingfacehub_api_token_here"
-Custom Prompt
-custom_prompt = PromptTemplate( input_variables=["context", "question"], template=""" You are a helpful assistant. Use the context below to answer the question. If the answer is not in the context, say "I don't know."
-Context: {context}
-Question: {question}
-Answer: """ )
-Load PDF and split into chunks
-def load_and_split_pdf(uploaded_file): loader = PyPDFLoader(uploaded_file.name) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) chunks = text_splitter.split_documents(documents) return chunks
-Build vectorstore from document chunks
-def build_vectorstore(chunks): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(chunks, embedding=embeddings) return db
-Build QA chain
-def build_qa_chain(vectorstore): llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.2, "max_length": 512}) qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff", chain_type_kwargs={"prompt": custom_prompt} ) return qa_chain
-Streamlit App
-st.set_page_config(page_title="Accurate PDF Chatbot", layout="centered") st.title("PDF QA Chatbot - RAG Powered")
 uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
-if uploaded_file: with st.spinner("Reading and processing PDF..."): chunks = load_and_split_pdf(uploaded_file) vectorstore = build_vectorstore(chunks) qa_chain = build_qa_chain(vectorstore) st.success("PDF processed. Ask your question below.")
-question = st.text_input("Ask a question from the PDF:")
-if question:
-    with st.spinner("Searching answer..."):
-        answer = qa_chain.run(question)
-        st.markdown(f"**Answer:** {answer}")

+import streamlit as st
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.llms import HuggingFaceHub
+import os
+# Set Hugging Face API Token
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingfacehub_api_token_here"
+# Custom Prompt
+custom_prompt = PromptTemplate(
+    input_variables=["context", "question"],
+    template="""
+You are a helpful assistant. Use the context below to answer the question.
+If the answer is not in the context, say "I don't know."
+Context:
+{context}
+Question:
+{question}
+Answer:
+"""
+)
+# Load PDF and split into chunks
+def load_and_split_pdf(uploaded_file):
+    loader = PyPDFLoader(uploaded_file.name)
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
+    chunks = text_splitter.split_documents(documents)
+    return chunks
+# Build vectorstore from document chunks
+def build_vectorstore(chunks):
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    db = FAISS.from_documents(chunks, embedding=embeddings)
+    return db
+# Build QA chain
+def build_qa_chain(vectorstore):
+    llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.2, "max_length": 512})
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=vectorstore.as_retriever(),
+        chain_type="stuff",
+        chain_type_kwargs={"prompt": custom_prompt}
+    )
+    return qa_chain
+# Streamlit App
+st.set_page_config(page_title="Accurate PDF Chatbot", layout="centered")
+st.title("PDF QA Chatbot - RAG Powered")
 uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
+if uploaded_file:
+    with st.spinner("Reading and processing PDF..."):
+        chunks = load_and_split_pdf(uploaded_file)
+        vectorstore = build_vectorstore(chunks)
+        qa_chain = build_qa_chain(vectorstore)
+        st.success("PDF processed. Ask your question below.")
+    question = st.text_input("Ask a question from the PDF:")
+    if question:
+        with st.spinner("Searching answer..."):
+            answer = qa_chain.run(question)
+            st.markdown(f"**Answer:** {answer}")