Spaces:

pradeepsengarr
/

Bot_RAG

Sleeping

App Files Files Community

pradeepsengarr commited on Apr 16

Commit

9dd6815

verified ·

1 Parent(s): fb66ccc

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -0

app.py CHANGED Viewed

	@@ -138,3 +138,95 @@
138
139
140

+import streamlit as st
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import PyPDFLoader
+from langchain.chains import RetrievalQA
+from langchain.llms import HuggingFaceHub
+import tempfile
+import os
+# Constants
+EMBEDDING_MODEL_NAME = "BAAI/bge-base-en-v1.5"
+LLM_MODEL_REPO = "mistralai/Mistral-7B-Instruct-v0.1"
+CHUNK_SIZE = 500
+CHUNK_OVERLAP = 300
+# Load and split documents
+def load_and_split_pdf(pdf_file):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+        tmp_file.write(pdf_file.read())
+        tmp_file_path = tmp_file.name
+    loader = PyPDFLoader(tmp_file_path)
+    documents = loader.load()
+    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+    chunks = splitter.split_documents(documents)
+    return chunks
+# Create FAISS vectorstore
+def build_vectorstore(chunks):
+    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
+    db = FAISS.from_documents(chunks, embedding=embeddings)
+    return db
+# Initialize LLM from Hugging Face Hub
+def get_llm():
+    return HuggingFaceHub(
+        repo_id=LLM_MODEL_REPO,
+        model_kwargs={"temperature": 0.3, "max_new_tokens": 512, "top_k": 10}
+    )
+# Custom prompt for better accuracy
+CUSTOM_PROMPT = """
+You are a professional resume chatbot. Use the context below to accurately and concisely answer the user's question. If the information is not available in the context, say "Not found in the document.".
+Context:
+{context}
+Question:
+{question}
+Answer:
+"""
+# Build QA chain
+def build_qa_chain(vectorstore):
+    return RetrievalQA.from_chain_type(
+        llm=get_llm(),
+        retriever=vectorstore.as_retriever(),
+        chain_type="stuff",
+        chain_type_kwargs={
+            "prompt": CUSTOM_PROMPT
+        }
+    )
+# Streamlit UI
+def main():
+    st.set_page_config(page_title="Resume Q&A Bot", layout="wide")
+    st.title("Resume Chatbot - Ask Anything About the Uploaded PDF")
+    uploaded_file = st.file_uploader("Upload your resume (PDF)", type="pdf")
+    if uploaded_file is not None:
+        st.success("PDF uploaded successfully!")
+        with st.spinner("Processing document and creating knowledge base..."):
+            chunks = load_and_split_pdf(uploaded_file)
+            vectorstore = build_vectorstore(chunks)
+            qa_chain = build_qa_chain(vectorstore)
+        st.success("Knowledge base ready! Ask your question below:")
+        question = st.text_input("Your Question:")
+        if question:
+            with st.spinner("Generating answer..."):
+                response = qa_chain.run(question)
+                st.markdown(f"**Answer:** {response}")
+if __name__ == '__main__':
+    main()