rahideer commited on
Commit
31a529e
·
verified ·
1 Parent(s): 1309017

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -20
app.py CHANGED
@@ -5,8 +5,6 @@ from transformers import pipeline
5
  import faiss
6
  import numpy as np
7
 
8
- # Load PDF and extract text
9
- @st.cache_data
10
  def load_pdf_text(pdf_path):
11
  reader = PdfReader(pdf_path)
12
  text = ''
@@ -14,7 +12,6 @@ def load_pdf_text(pdf_path):
14
  text += page.extract_text()
15
  return text
16
 
17
- # Split text into chunks
18
  def chunk_text(text, max_len=500):
19
  sentences = text.split('. ')
20
  chunks, chunk = [], ''
@@ -27,14 +24,12 @@ def chunk_text(text, max_len=500):
27
  chunks.append(chunk.strip())
28
  return chunks
29
 
30
- # Embed text using SentenceTransformer
31
  @st.cache_resource
32
  def embed_chunks(chunks):
33
  model = SentenceTransformer('all-MiniLM-L6-v2')
34
  embeddings = model.encode(chunks)
35
  return embeddings, model
36
 
37
- # RAG-style QA using FAISS and Transformers
38
  def answer_query(query, embeddings, chunks, model, qa_pipeline):
39
  query_embedding = model.encode([query])
40
  index = faiss.IndexFlatL2(embeddings.shape[1])
@@ -44,20 +39,16 @@ def answer_query(query, embeddings, chunks, model, qa_pipeline):
44
  result = qa_pipeline(question=query, context=context)
45
  return result['answer']
46
 
47
- # Streamlit UI
48
- st.title("📄 PDF QA with RAG")
49
- uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
50
 
51
- if uploaded_file:
52
- with open("document.pdf", "wb") as f:
53
- f.write(uploaded_file.read())
 
 
54
 
55
- raw_text = load_pdf_text("document.pdf")
56
- chunks = chunk_text(raw_text)
57
- embeddings, embedder = embed_chunks(chunks)
58
- qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
59
-
60
- query = st.text_input("Ask a question about the PDF:")
61
- if query:
62
- answer = answer_query(query, embeddings, chunks, embedder, qa)
63
- st.success(f"Answer: {answer}")
 
5
  import faiss
6
  import numpy as np
7
 
 
 
8
  def load_pdf_text(pdf_path):
9
  reader = PdfReader(pdf_path)
10
  text = ''
 
12
  text += page.extract_text()
13
  return text
14
 
 
15
  def chunk_text(text, max_len=500):
16
  sentences = text.split('. ')
17
  chunks, chunk = [], ''
 
24
  chunks.append(chunk.strip())
25
  return chunks
26
 
 
27
  @st.cache_resource
28
  def embed_chunks(chunks):
29
  model = SentenceTransformer('all-MiniLM-L6-v2')
30
  embeddings = model.encode(chunks)
31
  return embeddings, model
32
 
 
33
  def answer_query(query, embeddings, chunks, model, qa_pipeline):
34
  query_embedding = model.encode([query])
35
  index = faiss.IndexFlatL2(embeddings.shape[1])
 
39
  result = qa_pipeline(question=query, context=context)
40
  return result['answer']
41
 
42
+ st.title("🤖 RAG PDF QA App")
43
+ st.markdown("Ask questions about the preloaded PDF dataset.")
 
44
 
45
+ pdf_path = "ml_dataset_25_pages.pdf"
46
+ raw_text = load_pdf_text(pdf_path)
47
+ chunks = chunk_text(raw_text)
48
+ embeddings, embedder = embed_chunks(chunks)
49
+ qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
50
 
51
+ query = st.text_input("Enter your question:")
52
+ if query:
53
+ answer = answer_query(query, embeddings, chunks, embedder, qa)
54
+ st.success(f"Answer: {answer}")