Spaces:

rahideer
/

dataset

Sleeping

App Files Files Community

rahideer commited on Apr 18

Commit

c0a9dbe

verified ·

1 Parent(s): 1f717c0

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -23

app.py CHANGED Viewed

@@ -5,7 +5,45 @@ from transformers import pipeline
 import faiss
 import numpy as np
-# Load and extract text from local PDF
 def load_pdf_text(pdf_path):
     reader = PdfReader(pdf_path)
     text = ''
@@ -13,7 +51,6 @@ def load_pdf_text(pdf_path):
         text += page.extract_text()
     return text
-# Split text into chunks
 def chunk_text(text, max_len=500):
     sentences = text.split('. ')
     chunks, chunk = [], ''
@@ -26,14 +63,12 @@ def chunk_text(text, max_len=500):
     chunks.append(chunk.strip())
     return chunks
-# Embed text using SentenceTransformer
 @st.cache_resource
 def embed_chunks(chunks):
     model = SentenceTransformer('all-MiniLM-L6-v2')
     embeddings = model.encode(chunks)
     return embeddings, model
-# RAG QA using FAISS index and QA pipeline
 def answer_query(query, embeddings, chunks, model, qa_pipeline):
     query_embedding = model.encode([query])
     index = faiss.IndexFlatL2(embeddings.shape[1])
@@ -43,31 +78,34 @@ def answer_query(query, embeddings, chunks, model, qa_pipeline):
     result = qa_pipeline(question=query, context=context)
     return result['answer']
-# Main app
-st.title("🤖 RAG PDF QA App")
-st.markdown("Ask questions about the preloaded PDF dataset.")
-# Load and process the PDF
 pdf_path = "ml_dataset_25_pages.pdf"
 raw_text = load_pdf_text(pdf_path)
 chunks = chunk_text(raw_text)
 embeddings, embedder = embed_chunks(chunks)
 qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
-# Show sample questions
-st.subheader("Ask a Question")
-st.markdown("Here are some questions you can try:")
-st.markdown("""
-- What is supervised learning?
-- Explain the difference between regression and classification.
-- What are the applications of machine learning?
-- How does decision tree algorithm work?
-- What is overfitting in machine learning?
-""")
-# User input
-query = st.text_input("Enter your question below:")
 if query:
-    answer = answer_query(query, embeddings, chunks, embedder, qa)
-    st.success(f"Answer: {answer}")

 import faiss
 import numpy as np
+# ---------- Custom CSS ----------
+def apply_custom_style():
+    st.markdown("""
+        <style>
+        html, body, [class*="css"]  {
+            font-family: 'Segoe UI', sans-serif;
+            background-color: #f0f4ff;
+        }
+        .title {
+            background: linear-gradient(to right, #4a90e2, #00c6ff);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            font-size: 2.5em;
+            font-weight: bold;
+        }
+        .subtitle {
+            color: #444;
+            font-size: 1.2em;
+            margin-bottom: 1rem;
+        }
+        .question-box {
+            background-color: #fff;
+            padding: 1rem;
+            border-radius: 10px;
+            box-shadow: 0px 2px 10px rgba(0,0,0,0.1);
+            margin-bottom: 1rem;
+        }
+        .example {
+            color: #444;
+            background: #e9f0ff;
+            padding: 0.5rem;
+            border-radius: 8px;
+            margin: 3px 0;
+            cursor: pointer;
+        }
+        </style>
+    """, unsafe_allow_html=True)
+# ---------- PDF Reading ----------
 def load_pdf_text(pdf_path):
     reader = PdfReader(pdf_path)
     text = ''
         text += page.extract_text()
     return text
 def chunk_text(text, max_len=500):
     sentences = text.split('. ')
     chunks, chunk = [], ''
     chunks.append(chunk.strip())
     return chunks
 @st.cache_resource
 def embed_chunks(chunks):
     model = SentenceTransformer('all-MiniLM-L6-v2')
     embeddings = model.encode(chunks)
     return embeddings, model
 def answer_query(query, embeddings, chunks, model, qa_pipeline):
     query_embedding = model.encode([query])
     index = faiss.IndexFlatL2(embeddings.shape[1])
     result = qa_pipeline(question=query, context=context)
     return result['answer']
+# ---------- App Layout ----------
+apply_custom_style()
+st.markdown('<div class="title">🤖 RAG PDF Q&A App</div>', unsafe_allow_html=True)
+st.markdown('<div class="subtitle">Ask questions about a machine learning PDF. Powered by Transformers!</div>', unsafe_allow_html=True)
+# Load and process PDF
 pdf_path = "ml_dataset_25_pages.pdf"
 raw_text = load_pdf_text(pdf_path)
 chunks = chunk_text(raw_text)
 embeddings, embedder = embed_chunks(chunks)
 qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
+# Sample questions
+st.markdown('<div class="question-box"><strong>💡 Sample Questions:</strong>', unsafe_allow_html=True)
+sample_questions = [
+    "What is supervised learning?",
+    "Explain the difference between regression and classification.",
+    "What are the applications of machine learning?",
+    "How does decision tree algorithm work?",
+    "What is overfitting in machine learning?"
+]
+for q in sample_questions:
+    st.markdown(f'<div class="example">{q}</div>', unsafe_allow_html=True)
+st.markdown('</div>', unsafe_allow_html=True)
+# User Input
+query = st.text_input("🔎 Ask your question:")
 if query:
+    with st.spinner("Thinking..."):
+        answer = answer_query(query, embeddings, chunks, embedder, qa)
+    st.success(f"🧠 Answer: {answer}")