pradeepsengarr commited on
Commit
7afdcd2
·
verified ·
1 Parent(s): 44a599e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -59
app.py CHANGED
@@ -137,88 +137,89 @@
137
  # st.info("Upload a PDF to begin.")
138
 
139
 
140
-
141
  import streamlit as st
142
  from langchain_community.document_loaders import PyPDFLoader
143
- from langchain.text_splitter import RecursiveCharacterTextSplitter
 
144
  from langchain_community.vectorstores import FAISS
145
- from langchain.embeddings import HuggingFaceEmbeddings
146
  from langchain.chains import RetrievalQA
147
  from langchain.prompts import PromptTemplate
148
  from langchain.llms import HuggingFaceHub
149
- import os
150
 
151
- # Set Hugging Face API Token
152
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingfacehub_api_token_here"
 
 
 
 
 
 
 
 
 
 
 
153
 
154
- # Custom Prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  custom_prompt = PromptTemplate(
156
  input_variables=["context", "question"],
157
  template="""
158
- You are a helpful assistant. Use the context below to answer the question.
159
- If the answer is not in the context, say "I don't know."
160
 
161
  Context:
162
  {context}
163
 
164
- Question:
165
- {question}
166
 
167
- Answer:
168
- """
169
  )
170
 
171
- # Load PDF and split into chunks
172
-
173
- from langchain_community.document_loaders import PyPDFLoader
174
- import tempfile
175
-
176
- def load_and_split_pdf(uploaded_file):
177
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
178
- tmp_file.write(uploaded_file.read())
179
- tmp_file_path = tmp_file.name
180
-
181
- loader = PyPDFLoader(tmp_file_path)
182
- documents = loader.load()
183
-
184
- # Then your text splitting logic follows
185
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
186
- chunks = text_splitter.split_documents(documents)
187
- return chunks
188
-
189
- # Build vectorstore from document chunks
190
- def build_vectorstore(chunks):
191
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
192
- db = FAISS.from_documents(chunks, embedding=embeddings)
193
- return db
194
-
195
  # Build QA chain
196
  def build_qa_chain(vectorstore):
197
- llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.2, "max_length": 512})
198
  qa_chain = RetrievalQA.from_chain_type(
199
  llm=llm,
200
- retriever=vectorstore.as_retriever(),
201
- chain_type="stuff",
202
  chain_type_kwargs={"prompt": custom_prompt}
203
  )
204
  return qa_chain
205
 
206
- # Streamlit App
207
- st.set_page_config(page_title="Accurate PDF Chatbot", layout="centered")
208
- st.title("PDF QA Chatbot - RAG Powered")
209
-
210
- uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
211
-
212
- if uploaded_file:
213
- with st.spinner("Reading and processing PDF..."):
214
- chunks = load_and_split_pdf(uploaded_file)
215
- vectorstore = build_vectorstore(chunks)
216
- qa_chain = build_qa_chain(vectorstore)
217
- st.success("PDF processed. Ask your question below.")
218
-
219
- question = st.text_input("Ask a question from the PDF:")
220
-
221
- if question:
222
- with st.spinner("Searching answer..."):
223
- answer = qa_chain.run(question)
224
- st.markdown(f"**Answer:** {answer}")
 
 
 
 
 
 
137
  # st.info("Upload a PDF to begin.")
138
 
139
 
140
+ import os
141
  import streamlit as st
142
  from langchain_community.document_loaders import PyPDFLoader
143
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
144
+ from langchain_community.embeddings import HuggingFaceEmbeddings
145
  from langchain_community.vectorstores import FAISS
 
146
  from langchain.chains import RetrievalQA
147
  from langchain.prompts import PromptTemplate
148
  from langchain.llms import HuggingFaceHub
 
149
 
150
+ # Set your Hugging Face API token here
151
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_hf_token_here"
152
+
153
+ # Load and split PDF
154
+ def load_and_split_pdf(uploaded_file):
155
+ with open("temp.pdf", "wb") as f:
156
+ f.write(uploaded_file.read())
157
+ loader = PyPDFLoader("temp.pdf")
158
+ documents = loader.load()
159
+
160
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
161
+ chunks = text_splitter.split_documents(documents)
162
+ return chunks
163
 
164
+ # Build vectorstore
165
+ def build_vectorstore(chunks):
166
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
167
+ vectorstore = FAISS.from_documents(chunks, embedding=embedding_model)
168
+ return vectorstore
169
+
170
+ # Load Lamini or other HF model
171
+ def get_llm():
172
+ return HuggingFaceHub(
173
+ repo_id="lamini/lamini-13b-chat",
174
+ model_kwargs={"temperature": 0.2, "max_new_tokens": 512}
175
+ )
176
+
177
+ # Create prompt template (optional for better accuracy)
178
  custom_prompt = PromptTemplate(
179
  input_variables=["context", "question"],
180
  template="""
181
+ You are a helpful assistant. Use the following context to answer the question as accurately as possible.
182
+ If the answer is not in the context, respond with "Not found in the document."
183
 
184
  Context:
185
  {context}
186
 
187
+ Question: {question}
 
188
 
189
+ Answer:"""
 
190
  )
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  # Build QA chain
193
  def build_qa_chain(vectorstore):
194
+ llm = get_llm()
195
  qa_chain = RetrievalQA.from_chain_type(
196
  llm=llm,
197
+ retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
 
198
  chain_type_kwargs={"prompt": custom_prompt}
199
  )
200
  return qa_chain
201
 
202
+ # Streamlit UI
203
+ def main():
204
+ st.set_page_config(page_title="PDF Chatbot", layout="wide")
205
+ st.title("Chat with your PDF")
206
+
207
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
208
+
209
+ if uploaded_file:
210
+ st.success("PDF uploaded successfully!")
211
+ with st.spinner("Processing PDF..."):
212
+ chunks = load_and_split_pdf(uploaded_file)
213
+ vectorstore = build_vectorstore(chunks)
214
+ qa_chain = build_qa_chain(vectorstore)
215
+ st.success("Ready to chat!")
216
+
217
+ user_question = st.text_input("Ask a question based on the PDF:")
218
+ if user_question:
219
+ with st.spinner("Generating answer..."):
220
+ result = qa_chain.run(user_question)
221
+ st.markdown("**Answer:**")
222
+ st.write(result)
223
+
224
+ if __name__ == "__main__":
225
+ main()