Spaces:

rahideer
/

dataset

Sleeping

App Files Files Community

rahideer commited on 16 days ago

Commit

743388b

verified ·

1 Parent(s): d28742c

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -71

app.py CHANGED Viewed

@@ -1,98 +1,64 @@
 import streamlit as st
 import PyPDF2
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 from transformers import pipeline
-st.set_page_config(page_title="📘 PDF QA RAG App", layout="wide")
 # Custom styles
 st.markdown("""
     <style>
     .main {background-color: #f7faff;}
-    .block-container {padding-top: 2rem;}
-    h1 {color: #4051b5;}
     .stTextInput>div>div>input {border: 2px solid #d0d7ff;}
-    .stButton button {background-color: #4051b5; color: white; border-radius: 6px;}
-    .stSidebar {background-color: #eaf0ff;}
-    .sample-dropdown label {font-weight: bold;}
     </style>
 """, unsafe_allow_html=True)
-st.title("📘 Ask Me Anything From Your PDF")
-st.caption("Built using RAG (Retrieval-Augmented Generation) ✨")
-st.sidebar.header("📁 Upload PDF")
-uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type=["pdf"])
-default_questions = [
-    "What is machine learning?",
-    "Explain generalization in ML.",
-    "What are different types of ML?",
-    "How is ML used in computer vision?",
-    "Describe the importance of training data."
-]
-@st.cache_data
-def load_pdf(file):
-    reader = PyPDF2.PdfReader(file)
-    return [page.extract_text() for page in reader.pages]
 def chunk_text(pages, max_len=1000):
     text = " ".join(pages)
     words = text.split()
     return [' '.join(words[i:i+max_len]) for i in range(0, len(words), max_len)]
-def create_faiss_index(chunks, model):
     embeddings = model.encode(chunks)
     index = faiss.IndexFlatL2(embeddings.shape[1])
     index.add(np.array(embeddings))
-    return index, embeddings
-def retrieve_context(question, chunks, index, model, k=6):
-    q_embedding = model.encode([question])
-    _, I = index.search(np.array(q_embedding), k)
-    return "\n\n".join([chunks[i] for i in I[0]])
-if uploaded_file:
-    st.success("✅ PDF uploaded successfully!")
-    pages = load_pdf(uploaded_file)
-    chunks = chunk_text(pages)
-    model = SentenceTransformer('all-MiniLM-L6-v2')
-    index, _ = create_faiss_index(chunks, model)
-    qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
-    st.subheader("💬 Ask a question")
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        question = st.text_input("Enter your question here...", placeholder="e.g. What is deep learning?")
-    with col2:
-        if st.button("Ask"):
-            with st.spinner("🧠 Thinking..."):
-                context = retrieve_context(question, chunks, index, model)
-                result = qa_pipeline(question=question, context=context)
-                with st.expander("📖 Answer", expanded=True):
-                    st.markdown(result['answer'])
-    st.divider()
-    st.subheader("✨ Sample Questions")
-    selected_q = st.selectbox("Pick one to try:", default_questions, key="sample-dropdown")
-    if st.button("Try Selected Question"):
-        with st.spinner("⏳ Searching..."):
-            context = retrieve_context(selected_q, chunks, index, model)
-            result = qa_pipeline(question=selected_q, context=context)
-            with st.expander(f"💡 Answer to: '{selected_q}'", expanded=True):
-                st.markdown(result['answer'])
-    st.divider()
-    st.subheader("📄 Preview PDF Pages")
-    for i, page in enumerate(pages[:3]):
-        st.markdown(f"**Page {i+1}**")
-        st.code(page[:800] + "..." if len(page) > 800 else page)
-else:
-    st.info("Upload a PDF from the sidebar to begin.")

 import streamlit as st
 import PyPDF2
+import os
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 from transformers import pipeline
+st.set_page_config(page_title="📘 PDF RAG QA", layout="wide")
 # Custom styles
 st.markdown("""
     <style>
     .main {background-color: #f7faff;}
+    h1 {color: #4a4a8a;}
     .stTextInput>div>div>input {border: 2px solid #d0d7ff;}
+    .stButton button {background-color: #4a4a8a; color: white;}
     </style>
 """, unsafe_allow_html=True)
+st.title("📘 Ask Me Anything About Machine Learning")
+st.caption("Using RAG (Retrieval-Augmented Generation) and a preloaded PDF")
+# Load PDF from local file
+PDF_FILE = "data.pdf"
+def load_pdf(file_path):
+    with open(file_path, "rb") as f:
+        reader = PyPDF2.PdfReader(f)
+        return [page.extract_text() for page in reader.pages]
 def chunk_text(pages, max_len=1000):
     text = " ".join(pages)
     words = text.split()
     return [' '.join(words[i:i+max_len]) for i in range(0, len(words), max_len)]
+@st.cache_resource
+def setup_rag():
+    pages = load_pdf(PDF_FILE)
+    chunks = chunk_text(pages)
+    model = SentenceTransformer('all-MiniLM-L6-v2')
     embeddings = model.encode(chunks)
     index = faiss.IndexFlatL2(embeddings.shape[1])
     index.add(np.array(embeddings))
+    qa = pipeline("question-answering", model="deepset/roberta-base-squad2")
+    return chunks, model, index, qa
+def retrieve_answer(question, chunks, model, index, qa_pipeline, k=6):
+    q_embed = model.encode([question])
+    _, I = index.search(np.array(q_embed), k)
+    context = "\n\n".join([chunks[i] for i in I[0]])
+    result = qa_pipeline(question=question, context=context)
+    return result['answer']
+chunks, embed_model, faiss_index, qa_model = setup_rag()
+st.subheader("💬 Ask a Question")
+question = st.text_input("Enter your question:", placeholder="e.g., What is supervised learning?")
+if question:
+    with st.spinner("🧠 Searching for the answer..."):
+        answer = retrieve_answer(question, chunks, embed_model, faiss_index, qa_model)
+        st.markdown("#### 📖 Answer:")
+        st.write(answer)