Spaces:

pradeepsengarr
/

Bot_RAG

Sleeping

App Files Files Community

pradeepsengarr commited on 15 days ago

Commit

cd88a48

verified ·

1 Parent(s): c46f62c

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py CHANGED Viewed

@@ -135,3 +135,71 @@
 #         st.error("⚠️ No text could be extracted from the PDF. Try another file.")
 # else:
 #     st.info("Upload a PDF to begin.")

 #         st.error("⚠️ No text could be extracted from the PDF. Try another file.")
 # else:
 #     st.info("Upload a PDF to begin.")
+Filename: app.py
+import streamlit as st from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import PyPDFLoader from langchain.chains import RetrievalQA from langchain.llms import HuggingFaceHub import tempfile import os
+Constants
+EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5" LLM_MODEL_REPO = "mistralai/Mistral-7B-Instruct-v0.1" CHUNK_SIZE = 500 CHUNK_OVERLAP = 300
+Load and split documents
+def load_and_split_pdf(pdf_file): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(pdf_file.read()) tmp_file_path = tmp_file.name
+loader = PyPDFLoader(tmp_file_path)
+documents = loader.load()
+splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+chunks = splitter.split_documents(documents)
+return chunks
+Create FAISS vectorstore
+def build_vectorstore(chunks): embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME) db = FAISS.from_documents(chunks, embedding=embeddings) return db
+Initialize LLM from Hugging Face Hub
+def get_llm(): return HuggingFaceHub( repo_id=LLM_MODEL_REPO, model_kwargs={"temperature": 0.3, "max_new_tokens": 512, "top_k": 10} )
+Custom prompt for better accuracy
+CUSTOM_PROMPT = """ You are a professional resume chatbot. Use the context below to accurately and concisely answer the user's question. If the information is not available in the context, say "Not found in the document.".
+Context: {context}
+Question: {question}
+Answer: """
+Build QA chain
+def build_qa_chain(vectorstore): return RetrievalQA.from_chain_type( llm=get_llm(), retriever=vectorstore.as_retriever(), chain_type="stuff", chain_type_kwargs={ "prompt": CUSTOM_PROMPT } )
+Streamlit UI
+def main(): st.set_page_config(page_title="Resume Q&A Bot", layout="wide") st.title("Resume Chatbot - Ask Anything About the Uploaded PDF")
+uploaded_file = st.file_uploader("Upload your resume (PDF)", type="pdf")
+if uploaded_file is not None:
+    st.success("PDF uploaded successfully!")
+    with st.spinner("Processing document and creating knowledge base..."):
+        chunks = load_and_split_pdf(uploaded_file)
+        vectorstore = build_vectorstore(chunks)
+        qa_chain = build_qa_chain(vectorstore)
+    st.success("Knowledge base ready! Ask your question below:")
+    question = st.text_input("Your Question:")
+    if question:
+        with st.spinner("Generating answer..."):
+            response = qa_chain.run(question)
+            st.markdown(f"**Answer:** {response}")
+if name == 'main': main()