pradeepsengarr commited on
Commit
9dd6815
·
verified ·
1 Parent(s): fb66ccc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py CHANGED
@@ -138,3 +138,95 @@
138
 
139
 
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
 
140
 
141
+ import streamlit as st
142
+ from langchain.embeddings import HuggingFaceEmbeddings
143
+ from langchain.vectorstores import FAISS
144
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
145
+ from langchain.document_loaders import PyPDFLoader
146
+ from langchain.chains import RetrievalQA
147
+ from langchain.llms import HuggingFaceHub
148
+ import tempfile
149
+ import os
150
+
151
+ # Constants
152
+ EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"
153
+ LLM_MODEL_REPO = "mistralai/Mistral-7B-Instruct-v0.1"
154
+ CHUNK_SIZE = 500
155
+ CHUNK_OVERLAP = 300
156
+
157
+ # Load and split documents
158
+ def load_and_split_pdf(pdf_file):
159
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
160
+ tmp_file.write(pdf_file.read())
161
+ tmp_file_path = tmp_file.name
162
+
163
+ loader = PyPDFLoader(tmp_file_path)
164
+ documents = loader.load()
165
+ splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
166
+ chunks = splitter.split_documents(documents)
167
+ return chunks
168
+
169
+ # Create FAISS vectorstore
170
+ def build_vectorstore(chunks):
171
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
172
+ db = FAISS.from_documents(chunks, embedding=embeddings)
173
+ return db
174
+
175
+ # Initialize LLM from Hugging Face Hub
176
+ def get_llm():
177
+ return HuggingFaceHub(
178
+ repo_id=LLM_MODEL_REPO,
179
+ model_kwargs={"temperature": 0.3, "max_new_tokens": 512, "top_k": 10}
180
+ )
181
+
182
+ # Custom prompt for better accuracy
183
+ CUSTOM_PROMPT = """
184
+ You are a professional resume chatbot. Use the context below to accurately and concisely answer the user's question. If the information is not available in the context, say "Not found in the document.".
185
+
186
+ Context:
187
+ {context}
188
+
189
+ Question:
190
+ {question}
191
+
192
+ Answer:
193
+ """
194
+
195
+ # Build QA chain
196
+ def build_qa_chain(vectorstore):
197
+ return RetrievalQA.from_chain_type(
198
+ llm=get_llm(),
199
+ retriever=vectorstore.as_retriever(),
200
+ chain_type="stuff",
201
+ chain_type_kwargs={
202
+ "prompt": CUSTOM_PROMPT
203
+ }
204
+ )
205
+
206
+ # Streamlit UI
207
+ def main():
208
+ st.set_page_config(page_title="Resume Q&A Bot", layout="wide")
209
+ st.title("Resume Chatbot - Ask Anything About the Uploaded PDF")
210
+
211
+ uploaded_file = st.file_uploader("Upload your resume (PDF)", type="pdf")
212
+
213
+ if uploaded_file is not None:
214
+ st.success("PDF uploaded successfully!")
215
+ with st.spinner("Processing document and creating knowledge base..."):
216
+ chunks = load_and_split_pdf(uploaded_file)
217
+ vectorstore = build_vectorstore(chunks)
218
+ qa_chain = build_qa_chain(vectorstore)
219
+
220
+ st.success("Knowledge base ready! Ask your question below:")
221
+
222
+ question = st.text_input("Your Question:")
223
+
224
+ if question:
225
+ with st.spinner("Generating answer..."):
226
+ response = qa_chain.run(question)
227
+ st.markdown(f"**Answer:** {response}")
228
+
229
+ if __name__ == '__main__':
230
+ main()
231
+
232
+