Spaces:

rahideer
/

dataset

Sleeping

App Files Files Community

rahideer commited on Apr 18

Commit

31a529e

verified ·

1 Parent(s): 1309017

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -20

app.py CHANGED Viewed

@@ -5,8 +5,6 @@ from transformers import pipeline
 import faiss
 import numpy as np
-# Load PDF and extract text
-@st.cache_data
 def load_pdf_text(pdf_path):
     reader = PdfReader(pdf_path)
     text = ''
@@ -14,7 +12,6 @@ def load_pdf_text(pdf_path):
         text += page.extract_text()
     return text
-# Split text into chunks
 def chunk_text(text, max_len=500):
     sentences = text.split('. ')
     chunks, chunk = [], ''
@@ -27,14 +24,12 @@ def chunk_text(text, max_len=500):
     chunks.append(chunk.strip())
     return chunks
-# Embed text using SentenceTransformer
 @st.cache_resource
 def embed_chunks(chunks):
     model = SentenceTransformer('all-MiniLM-L6-v2')
     embeddings = model.encode(chunks)
     return embeddings, model
-# RAG-style QA using FAISS and Transformers
 def answer_query(query, embeddings, chunks, model, qa_pipeline):
     query_embedding = model.encode([query])
     index = faiss.IndexFlatL2(embeddings.shape[1])
@@ -44,20 +39,16 @@ def answer_query(query, embeddings, chunks, model, qa_pipeline):
     result = qa_pipeline(question=query, context=context)
     return result['answer']
-# Streamlit UI
-st.title("📄 PDF QA with RAG")
-uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
-if uploaded_file:
-    with open("document.pdf", "wb") as f:
-        f.write(uploaded_file.read())
-    raw_text = load_pdf_text("document.pdf")
-    chunks = chunk_text(raw_text)
-    embeddings, embedder = embed_chunks(chunks)
-    qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
-    query = st.text_input("Ask a question about the PDF:")
-    if query:
-        answer = answer_query(query, embeddings, chunks, embedder, qa)
-        st.success(f"Answer: {answer}")

 import faiss
 import numpy as np
 def load_pdf_text(pdf_path):
     reader = PdfReader(pdf_path)
     text = ''
         text += page.extract_text()
     return text
 def chunk_text(text, max_len=500):
     sentences = text.split('. ')
     chunks, chunk = [], ''
     chunks.append(chunk.strip())
     return chunks
 @st.cache_resource
 def embed_chunks(chunks):
     model = SentenceTransformer('all-MiniLM-L6-v2')
     embeddings = model.encode(chunks)
     return embeddings, model
 def answer_query(query, embeddings, chunks, model, qa_pipeline):
     query_embedding = model.encode([query])
     index = faiss.IndexFlatL2(embeddings.shape[1])
     result = qa_pipeline(question=query, context=context)
     return result['answer']
+st.title("🤖 RAG PDF QA App")
+st.markdown("Ask questions about the preloaded PDF dataset.")
+pdf_path = "ml_dataset_25_pages.pdf"
+raw_text = load_pdf_text(pdf_path)
+chunks = chunk_text(raw_text)
+embeddings, embedder = embed_chunks(chunks)
+qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
+query = st.text_input("Enter your question:")
+if query:
+    answer = answer_query(query, embeddings, chunks, embedder, qa)
+    st.success(f"Answer: {answer}")