Spaces:

pradeepsengarr
/

Bot_RAG

Sleeping

pradeepsengarr commited on Apr 16

Commit

e428e3e

verified ·

1 Parent(s): 920b3d6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -417,7 +417,6 @@
 # else:
 #     st.sidebar.info("Upload PDFs to begin your QA journey.")
 import os
 import streamlit as st
 import fitz  # PyMuPDF
@@ -449,13 +448,14 @@ def load_model():
     pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512)
     return HuggingFacePipeline(pipeline=pipe)
-# --- Extract PDF Text ---
 def read_pdf(file):
     try:
         doc = fitz.open(stream=file.read(), filetype="pdf")
         text = ""
         for page in doc:
-            text += page.get_text()
         return text.strip()
     except Exception as e:
         logging.error(f"Failed to extract text: {e}")
@@ -558,3 +558,4 @@ if uploaded_file:
         st.error("⚠️ No text could be extracted from the PDF. Try another file.")
 else:
     st.info("Upload a PDF to begin.")

 # else:
 #     st.sidebar.info("Upload PDFs to begin your QA journey.")
 import os
 import streamlit as st
 import fitz  # PyMuPDF
     pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512)
     return HuggingFacePipeline(pipeline=pipe)
+# --- Extract PDF Text (Improved) ---
 def read_pdf(file):
     try:
         doc = fitz.open(stream=file.read(), filetype="pdf")
         text = ""
+        # Extract text from each page
         for page in doc:
+            text += page.get_text("text")  # You can also use "dict" for structured text or "html"
         return text.strip()
     except Exception as e:
         logging.error(f"Failed to extract text: {e}")
         st.error("⚠️ No text could be extracted from the PDF. Try another file.")
 else:
     st.info("Upload a PDF to begin.")