rahideer commited on
Commit
c0a9dbe
Β·
verified Β·
1 Parent(s): 1f717c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -23
app.py CHANGED
@@ -5,7 +5,45 @@ from transformers import pipeline
5
  import faiss
6
  import numpy as np
7
 
8
- # Load and extract text from local PDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def load_pdf_text(pdf_path):
10
  reader = PdfReader(pdf_path)
11
  text = ''
@@ -13,7 +51,6 @@ def load_pdf_text(pdf_path):
13
  text += page.extract_text()
14
  return text
15
 
16
- # Split text into chunks
17
  def chunk_text(text, max_len=500):
18
  sentences = text.split('. ')
19
  chunks, chunk = [], ''
@@ -26,14 +63,12 @@ def chunk_text(text, max_len=500):
26
  chunks.append(chunk.strip())
27
  return chunks
28
 
29
- # Embed text using SentenceTransformer
30
  @st.cache_resource
31
  def embed_chunks(chunks):
32
  model = SentenceTransformer('all-MiniLM-L6-v2')
33
  embeddings = model.encode(chunks)
34
  return embeddings, model
35
 
36
- # RAG QA using FAISS index and QA pipeline
37
  def answer_query(query, embeddings, chunks, model, qa_pipeline):
38
  query_embedding = model.encode([query])
39
  index = faiss.IndexFlatL2(embeddings.shape[1])
@@ -43,31 +78,34 @@ def answer_query(query, embeddings, chunks, model, qa_pipeline):
43
  result = qa_pipeline(question=query, context=context)
44
  return result['answer']
45
 
46
- # Main app
47
- st.title("πŸ€– RAG PDF QA App")
48
- st.markdown("Ask questions about the preloaded PDF dataset.")
 
49
 
50
- # Load and process the PDF
51
  pdf_path = "ml_dataset_25_pages.pdf"
52
  raw_text = load_pdf_text(pdf_path)
53
  chunks = chunk_text(raw_text)
54
  embeddings, embedder = embed_chunks(chunks)
55
  qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
56
 
57
- # Show sample questions
58
- st.subheader("Ask a Question")
59
- st.markdown("Here are some questions you can try:")
60
- st.markdown("""
61
- - What is supervised learning?
62
- - Explain the difference between regression and classification.
63
- - What are the applications of machine learning?
64
- - How does decision tree algorithm work?
65
- - What is overfitting in machine learning?
66
- """)
67
-
68
- # User input
69
- query = st.text_input("Enter your question below:")
70
 
 
 
71
  if query:
72
- answer = answer_query(query, embeddings, chunks, embedder, qa)
73
- st.success(f"Answer: {answer}")
 
 
5
  import faiss
6
  import numpy as np
7
 
8
+ # ---------- Custom CSS ----------
9
+ def apply_custom_style():
10
+ st.markdown("""
11
+ <style>
12
+ html, body, [class*="css"] {
13
+ font-family: 'Segoe UI', sans-serif;
14
+ background-color: #f0f4ff;
15
+ }
16
+ .title {
17
+ background: linear-gradient(to right, #4a90e2, #00c6ff);
18
+ -webkit-background-clip: text;
19
+ -webkit-text-fill-color: transparent;
20
+ font-size: 2.5em;
21
+ font-weight: bold;
22
+ }
23
+ .subtitle {
24
+ color: #444;
25
+ font-size: 1.2em;
26
+ margin-bottom: 1rem;
27
+ }
28
+ .question-box {
29
+ background-color: #fff;
30
+ padding: 1rem;
31
+ border-radius: 10px;
32
+ box-shadow: 0px 2px 10px rgba(0,0,0,0.1);
33
+ margin-bottom: 1rem;
34
+ }
35
+ .example {
36
+ color: #444;
37
+ background: #e9f0ff;
38
+ padding: 0.5rem;
39
+ border-radius: 8px;
40
+ margin: 3px 0;
41
+ cursor: pointer;
42
+ }
43
+ </style>
44
+ """, unsafe_allow_html=True)
45
+
46
+ # ---------- PDF Reading ----------
47
  def load_pdf_text(pdf_path):
48
  reader = PdfReader(pdf_path)
49
  text = ''
 
51
  text += page.extract_text()
52
  return text
53
 
 
54
  def chunk_text(text, max_len=500):
55
  sentences = text.split('. ')
56
  chunks, chunk = [], ''
 
63
  chunks.append(chunk.strip())
64
  return chunks
65
 
 
66
  @st.cache_resource
67
  def embed_chunks(chunks):
68
  model = SentenceTransformer('all-MiniLM-L6-v2')
69
  embeddings = model.encode(chunks)
70
  return embeddings, model
71
 
 
72
  def answer_query(query, embeddings, chunks, model, qa_pipeline):
73
  query_embedding = model.encode([query])
74
  index = faiss.IndexFlatL2(embeddings.shape[1])
 
78
  result = qa_pipeline(question=query, context=context)
79
  return result['answer']
80
 
81
+ # ---------- App Layout ----------
82
+ apply_custom_style()
83
+ st.markdown('<div class="title">πŸ€– RAG PDF Q&A App</div>', unsafe_allow_html=True)
84
+ st.markdown('<div class="subtitle">Ask questions about a machine learning PDF. Powered by Transformers!</div>', unsafe_allow_html=True)
85
 
86
+ # Load and process PDF
87
  pdf_path = "ml_dataset_25_pages.pdf"
88
  raw_text = load_pdf_text(pdf_path)
89
  chunks = chunk_text(raw_text)
90
  embeddings, embedder = embed_chunks(chunks)
91
  qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
92
 
93
+ # Sample questions
94
+ st.markdown('<div class="question-box"><strong>πŸ’‘ Sample Questions:</strong>', unsafe_allow_html=True)
95
+ sample_questions = [
96
+ "What is supervised learning?",
97
+ "Explain the difference between regression and classification.",
98
+ "What are the applications of machine learning?",
99
+ "How does decision tree algorithm work?",
100
+ "What is overfitting in machine learning?"
101
+ ]
102
+ for q in sample_questions:
103
+ st.markdown(f'<div class="example">{q}</div>', unsafe_allow_html=True)
104
+ st.markdown('</div>', unsafe_allow_html=True)
 
105
 
106
+ # User Input
107
+ query = st.text_input("πŸ”Ž Ask your question:")
108
  if query:
109
+ with st.spinner("Thinking..."):
110
+ answer = answer_query(query, embeddings, chunks, embedder, qa)
111
+ st.success(f"🧠 Answer: {answer}")