Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -444,7 +444,6 @@
|
|
444 |
# π₯ Run Streamlit App
|
445 |
# if __name__ == '__main__':
|
446 |
# main()
|
447 |
-
|
448 |
import streamlit as st
|
449 |
import os
|
450 |
import re
|
@@ -479,24 +478,34 @@ lora_config = LoraConfig(
|
|
479 |
model = get_peft_model(model, lora_config)
|
480 |
model.eval()
|
481 |
|
482 |
-
# π Function to Read & Extract Text from PDFs
|
483 |
-
def read_files(
|
484 |
-
st.write("π Processing uploaded file...") # Debugging
|
485 |
-
file_context = ""
|
486 |
try:
|
487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
for page in reader.pages:
|
489 |
text = page.extract_text()
|
490 |
if text:
|
491 |
file_context += text + "\n"
|
492 |
|
|
|
|
|
|
|
493 |
if not file_context.strip():
|
494 |
st.error("β οΈ No text found. The document might be scanned or encrypted.")
|
495 |
return ""
|
496 |
|
497 |
st.write(f"β
Extracted {len(file_context)} characters.") # Debugging
|
498 |
return file_context.strip()
|
499 |
-
|
500 |
except Exception as e:
|
501 |
st.error(f"β οΈ Error reading PDF: {e}")
|
502 |
return ""
|
@@ -595,6 +604,7 @@ if __name__ == '__main__':
|
|
595 |
|
596 |
|
597 |
|
|
|
598 |
# import streamlit as st
|
599 |
# from PyPDF2 import PdfReader
|
600 |
|
|
|
444 |
# π₯ Run Streamlit App
|
445 |
# if __name__ == '__main__':
|
446 |
# main()
|
|
|
447 |
import streamlit as st
|
448 |
import os
|
449 |
import re
|
|
|
478 |
model = get_peft_model(model, lora_config)
|
479 |
model.eval()
|
480 |
|
481 |
+
# π Function to Read & Extract Text from PDFs
|
482 |
+
def read_files(uploaded_file):
|
|
|
|
|
483 |
try:
|
484 |
+
# π₯ Step 1: Save file to disk first
|
485 |
+
temp_pdf_path = "temp_uploaded_file.pdf"
|
486 |
+
with open(temp_pdf_path, "wb") as f:
|
487 |
+
f.write(uploaded_file.getbuffer()) # Save the file
|
488 |
+
|
489 |
+
# π₯ Step 2: Open the saved file and extract text
|
490 |
+
st.write("π Processing saved PDF file...") # Debugging
|
491 |
+
file_context = ""
|
492 |
+
reader = PdfReader(temp_pdf_path)
|
493 |
+
|
494 |
for page in reader.pages:
|
495 |
text = page.extract_text()
|
496 |
if text:
|
497 |
file_context += text + "\n"
|
498 |
|
499 |
+
# π₯ Step 3: Delete the temp file after reading
|
500 |
+
os.remove(temp_pdf_path)
|
501 |
+
|
502 |
if not file_context.strip():
|
503 |
st.error("β οΈ No text found. The document might be scanned or encrypted.")
|
504 |
return ""
|
505 |
|
506 |
st.write(f"β
Extracted {len(file_context)} characters.") # Debugging
|
507 |
return file_context.strip()
|
508 |
+
|
509 |
except Exception as e:
|
510 |
st.error(f"β οΈ Error reading PDF: {e}")
|
511 |
return ""
|
|
|
604 |
|
605 |
|
606 |
|
607 |
+
|
608 |
# import streamlit as st
|
609 |
# from PyPDF2 import PdfReader
|
610 |
|