rahideer commited on
Commit
1f717c0
·
verified ·
1 Parent(s): 31a529e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -5,6 +5,7 @@ from transformers import pipeline
5
  import faiss
6
  import numpy as np
7
 
 
8
  def load_pdf_text(pdf_path):
9
  reader = PdfReader(pdf_path)
10
  text = ''
@@ -12,6 +13,7 @@ def load_pdf_text(pdf_path):
12
  text += page.extract_text()
13
  return text
14
 
 
15
  def chunk_text(text, max_len=500):
16
  sentences = text.split('. ')
17
  chunks, chunk = [], ''
@@ -24,12 +26,14 @@ def chunk_text(text, max_len=500):
24
  chunks.append(chunk.strip())
25
  return chunks
26
 
 
27
  @st.cache_resource
28
  def embed_chunks(chunks):
29
  model = SentenceTransformer('all-MiniLM-L6-v2')
30
  embeddings = model.encode(chunks)
31
  return embeddings, model
32
 
 
33
  def answer_query(query, embeddings, chunks, model, qa_pipeline):
34
  query_embedding = model.encode([query])
35
  index = faiss.IndexFlatL2(embeddings.shape[1])
@@ -39,16 +43,31 @@ def answer_query(query, embeddings, chunks, model, qa_pipeline):
39
  result = qa_pipeline(question=query, context=context)
40
  return result['answer']
41
 
 
42
  st.title("🤖 RAG PDF QA App")
43
  st.markdown("Ask questions about the preloaded PDF dataset.")
44
 
 
45
  pdf_path = "ml_dataset_25_pages.pdf"
46
  raw_text = load_pdf_text(pdf_path)
47
  chunks = chunk_text(raw_text)
48
  embeddings, embedder = embed_chunks(chunks)
49
  qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
50
 
51
- query = st.text_input("Enter your question:")
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  if query:
53
  answer = answer_query(query, embeddings, chunks, embedder, qa)
54
  st.success(f"Answer: {answer}")
 
5
  import faiss
6
  import numpy as np
7
 
8
+ # Load and extract text from local PDF
9
  def load_pdf_text(pdf_path):
10
  reader = PdfReader(pdf_path)
11
  text = ''
 
13
  text += page.extract_text()
14
  return text
15
 
16
+ # Split text into chunks
17
  def chunk_text(text, max_len=500):
18
  sentences = text.split('. ')
19
  chunks, chunk = [], ''
 
26
  chunks.append(chunk.strip())
27
  return chunks
28
 
29
+ # Embed text using SentenceTransformer
30
  @st.cache_resource
31
  def embed_chunks(chunks):
32
  model = SentenceTransformer('all-MiniLM-L6-v2')
33
  embeddings = model.encode(chunks)
34
  return embeddings, model
35
 
36
+ # RAG QA using FAISS index and QA pipeline
37
  def answer_query(query, embeddings, chunks, model, qa_pipeline):
38
  query_embedding = model.encode([query])
39
  index = faiss.IndexFlatL2(embeddings.shape[1])
 
43
  result = qa_pipeline(question=query, context=context)
44
  return result['answer']
45
 
46
+ # Main app
47
  st.title("🤖 RAG PDF QA App")
48
  st.markdown("Ask questions about the preloaded PDF dataset.")
49
 
50
+ # Load and process the PDF
51
  pdf_path = "ml_dataset_25_pages.pdf"
52
  raw_text = load_pdf_text(pdf_path)
53
  chunks = chunk_text(raw_text)
54
  embeddings, embedder = embed_chunks(chunks)
55
  qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
56
 
57
+ # Show sample questions
58
+ st.subheader("Ask a Question")
59
+ st.markdown("Here are some questions you can try:")
60
+ st.markdown("""
61
+ - What is supervised learning?
62
+ - Explain the difference between regression and classification.
63
+ - What are the applications of machine learning?
64
+ - How does decision tree algorithm work?
65
+ - What is overfitting in machine learning?
66
+ """)
67
+
68
+ # User input
69
+ query = st.text_input("Enter your question below:")
70
+
71
  if query:
72
  answer = answer_query(query, embeddings, chunks, embedder, qa)
73
  st.success(f"Answer: {answer}")