pradeepsengarr commited on
Commit
cd88a48
·
verified ·
1 Parent(s): c46f62c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py CHANGED
@@ -135,3 +135,71 @@
135
  # st.error("⚠️ No text could be extracted from the PDF. Try another file.")
136
  # else:
137
  # st.info("Upload a PDF to begin.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # st.error("⚠️ No text could be extracted from the PDF. Try another file.")
136
  # else:
137
  # st.info("Upload a PDF to begin.")
138
+
139
+
140
+
141
+ Filename: app.py
142
+
143
+ import streamlit as st from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import PyPDFLoader from langchain.chains import RetrievalQA from langchain.llms import HuggingFaceHub import tempfile import os
144
+
145
+ Constants
146
+
147
+ EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5" LLM_MODEL_REPO = "mistralai/Mistral-7B-Instruct-v0.1" CHUNK_SIZE = 500 CHUNK_OVERLAP = 300
148
+
149
+ Load and split documents
150
+
151
+ def load_and_split_pdf(pdf_file): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(pdf_file.read()) tmp_file_path = tmp_file.name
152
+
153
+ loader = PyPDFLoader(tmp_file_path)
154
+ documents = loader.load()
155
+ splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
156
+ chunks = splitter.split_documents(documents)
157
+ return chunks
158
+
159
+ Create FAISS vectorstore
160
+
161
+ def build_vectorstore(chunks): embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME) db = FAISS.from_documents(chunks, embedding=embeddings) return db
162
+
163
+ Initialize LLM from Hugging Face Hub
164
+
165
+ def get_llm(): return HuggingFaceHub( repo_id=LLM_MODEL_REPO, model_kwargs={"temperature": 0.3, "max_new_tokens": 512, "top_k": 10} )
166
+
167
+ Custom prompt for better accuracy
168
+
169
+ CUSTOM_PROMPT = """ You are a professional resume chatbot. Use the context below to accurately and concisely answer the user's question. If the information is not available in the context, say "Not found in the document.".
170
+
171
+ Context: {context}
172
+
173
+ Question: {question}
174
+
175
+ Answer: """
176
+
177
+ Build QA chain
178
+
179
+ def build_qa_chain(vectorstore): return RetrievalQA.from_chain_type( llm=get_llm(), retriever=vectorstore.as_retriever(), chain_type="stuff", chain_type_kwargs={ "prompt": CUSTOM_PROMPT } )
180
+
181
+ Streamlit UI
182
+
183
+ def main(): st.set_page_config(page_title="Resume Q&A Bot", layout="wide") st.title("Resume Chatbot - Ask Anything About the Uploaded PDF")
184
+
185
+ uploaded_file = st.file_uploader("Upload your resume (PDF)", type="pdf")
186
+
187
+ if uploaded_file is not None:
188
+ st.success("PDF uploaded successfully!")
189
+ with st.spinner("Processing document and creating knowledge base..."):
190
+ chunks = load_and_split_pdf(uploaded_file)
191
+ vectorstore = build_vectorstore(chunks)
192
+ qa_chain = build_qa_chain(vectorstore)
193
+
194
+ st.success("Knowledge base ready! Ask your question below:")
195
+
196
+ question = st.text_input("Your Question:")
197
+
198
+ if question:
199
+ with st.spinner("Generating answer..."):
200
+ response = qa_chain.run(question)
201
+ st.markdown(f"**Answer:** {response}")
202
+
203
+ if name == 'main': main()
204
+
205
+