Spaces:

rahideer
/

dataset

Sleeping

App Files Files Community

rahideer commited on Apr 18

Commit

1f717c0

verified ·

1 Parent(s): 31a529e

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -1

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from transformers import pipeline
 import faiss
 import numpy as np
 def load_pdf_text(pdf_path):
     reader = PdfReader(pdf_path)
     text = ''
@@ -12,6 +13,7 @@ def load_pdf_text(pdf_path):
         text += page.extract_text()
     return text
 def chunk_text(text, max_len=500):
     sentences = text.split('. ')
     chunks, chunk = [], ''
@@ -24,12 +26,14 @@ def chunk_text(text, max_len=500):
     chunks.append(chunk.strip())
     return chunks
 @st.cache_resource
 def embed_chunks(chunks):
     model = SentenceTransformer('all-MiniLM-L6-v2')
     embeddings = model.encode(chunks)
     return embeddings, model
 def answer_query(query, embeddings, chunks, model, qa_pipeline):
     query_embedding = model.encode([query])
     index = faiss.IndexFlatL2(embeddings.shape[1])
@@ -39,16 +43,31 @@ def answer_query(query, embeddings, chunks, model, qa_pipeline):
     result = qa_pipeline(question=query, context=context)
     return result['answer']
 st.title("🤖 RAG PDF QA App")
 st.markdown("Ask questions about the preloaded PDF dataset.")
 pdf_path = "ml_dataset_25_pages.pdf"
 raw_text = load_pdf_text(pdf_path)
 chunks = chunk_text(raw_text)
 embeddings, embedder = embed_chunks(chunks)
 qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
-query = st.text_input("Enter your question:")
 if query:
     answer = answer_query(query, embeddings, chunks, embedder, qa)
     st.success(f"Answer: {answer}")

 import faiss
 import numpy as np
+# Load and extract text from local PDF
 def load_pdf_text(pdf_path):
     reader = PdfReader(pdf_path)
     text = ''
         text += page.extract_text()
     return text
+# Split text into chunks
 def chunk_text(text, max_len=500):
     sentences = text.split('. ')
     chunks, chunk = [], ''
     chunks.append(chunk.strip())
     return chunks
+# Embed text using SentenceTransformer
 @st.cache_resource
 def embed_chunks(chunks):
     model = SentenceTransformer('all-MiniLM-L6-v2')
     embeddings = model.encode(chunks)
     return embeddings, model
+# RAG QA using FAISS index and QA pipeline
 def answer_query(query, embeddings, chunks, model, qa_pipeline):
     query_embedding = model.encode([query])
     index = faiss.IndexFlatL2(embeddings.shape[1])
     result = qa_pipeline(question=query, context=context)
     return result['answer']
+# Main app
 st.title("🤖 RAG PDF QA App")
 st.markdown("Ask questions about the preloaded PDF dataset.")
+# Load and process the PDF
 pdf_path = "ml_dataset_25_pages.pdf"
 raw_text = load_pdf_text(pdf_path)
 chunks = chunk_text(raw_text)
 embeddings, embedder = embed_chunks(chunks)
 qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
+# Show sample questions
+st.subheader("Ask a Question")
+st.markdown("Here are some questions you can try:")
+st.markdown("""
+- What is supervised learning?
+- Explain the difference between regression and classification.
+- What are the applications of machine learning?
+- How does decision tree algorithm work?
+- What is overfitting in machine learning?
+""")
+# User input
+query = st.text_input("Enter your question below:")
 if query:
     answer = answer_query(query, embeddings, chunks, embedder, qa)
     st.success(f"Answer: {answer}")