pradeepsengarr commited on
Commit
6e1c9c8
·
verified ·
1 Parent(s): 2427008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -36
app.py CHANGED
@@ -138,46 +138,77 @@
138
 
139
 
140
 
141
- import streamlit as st from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import os
142
-
143
- Set Hugging Face API Token
144
-
 
 
 
 
 
 
 
145
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingfacehub_api_token_here"
146
 
147
- Custom Prompt
148
-
149
- custom_prompt = PromptTemplate( input_variables=["context", "question"], template=""" You are a helpful assistant. Use the context below to answer the question. If the answer is not in the context, say "I don't know."
150
-
151
- Context: {context}
152
-
153
- Question: {question}
154
-
155
- Answer: """ )
156
-
157
- Load PDF and split into chunks
158
-
159
- def load_and_split_pdf(uploaded_file): loader = PyPDFLoader(uploaded_file.name) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) chunks = text_splitter.split_documents(documents) return chunks
160
-
161
- Build vectorstore from document chunks
162
-
163
- def build_vectorstore(chunks): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(chunks, embedding=embeddings) return db
164
-
165
- Build QA chain
166
-
167
- def build_qa_chain(vectorstore): llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.2, "max_length": 512}) qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff", chain_type_kwargs={"prompt": custom_prompt} ) return qa_chain
168
-
169
- Streamlit App
170
-
171
- st.set_page_config(page_title="Accurate PDF Chatbot", layout="centered") st.title("PDF QA Chatbot - RAG Powered")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
174
 
175
- if uploaded_file: with st.spinner("Reading and processing PDF..."): chunks = load_and_split_pdf(uploaded_file) vectorstore = build_vectorstore(chunks) qa_chain = build_qa_chain(vectorstore) st.success("PDF processed. Ask your question below.")
176
-
177
- question = st.text_input("Ask a question from the PDF:")
 
 
 
178
 
179
- if question:
180
- with st.spinner("Searching answer..."):
181
- answer = qa_chain.run(question)
182
- st.markdown(f"**Answer:** {answer}")
183
 
 
 
 
 
 
138
 
139
 
140
 
141
+ import streamlit as st
142
+ from langchain_community.document_loaders import PyPDFLoader
143
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
144
+ from langchain_community.vectorstores import FAISS
145
+ from langchain.embeddings import HuggingFaceEmbeddings
146
+ from langchain.chains import RetrievalQA
147
+ from langchain.prompts import PromptTemplate
148
+ from langchain.llms import HuggingFaceHub
149
+ import os
150
+
151
+ # Set Hugging Face API Token
152
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingfacehub_api_token_here"
153
 
154
+ # Custom Prompt
155
+ custom_prompt = PromptTemplate(
156
+ input_variables=["context", "question"],
157
+ template="""
158
+ You are a helpful assistant. Use the context below to answer the question.
159
+ If the answer is not in the context, say "I don't know."
160
+
161
+ Context:
162
+ {context}
163
+
164
+ Question:
165
+ {question}
166
+
167
+ Answer:
168
+ """
169
+ )
170
+
171
+ # Load PDF and split into chunks
172
+ def load_and_split_pdf(uploaded_file):
173
+ loader = PyPDFLoader(uploaded_file.name)
174
+ documents = loader.load()
175
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
176
+ chunks = text_splitter.split_documents(documents)
177
+ return chunks
178
+
179
+ # Build vectorstore from document chunks
180
+ def build_vectorstore(chunks):
181
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
182
+ db = FAISS.from_documents(chunks, embedding=embeddings)
183
+ return db
184
+
185
+ # Build QA chain
186
+ def build_qa_chain(vectorstore):
187
+ llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.2, "max_length": 512})
188
+ qa_chain = RetrievalQA.from_chain_type(
189
+ llm=llm,
190
+ retriever=vectorstore.as_retriever(),
191
+ chain_type="stuff",
192
+ chain_type_kwargs={"prompt": custom_prompt}
193
+ )
194
+ return qa_chain
195
+
196
+ # Streamlit App
197
+ st.set_page_config(page_title="Accurate PDF Chatbot", layout="centered")
198
+ st.title("PDF QA Chatbot - RAG Powered")
199
 
200
  uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
201
 
202
+ if uploaded_file:
203
+ with st.spinner("Reading and processing PDF..."):
204
+ chunks = load_and_split_pdf(uploaded_file)
205
+ vectorstore = build_vectorstore(chunks)
206
+ qa_chain = build_qa_chain(vectorstore)
207
+ st.success("PDF processed. Ask your question below.")
208
 
209
+ question = st.text_input("Ask a question from the PDF:")
 
 
 
210
 
211
+ if question:
212
+ with st.spinner("Searching answer..."):
213
+ answer = qa_chain.run(question)
214
+ st.markdown(f"**Answer:** {answer}")