TabasumDev commited on
Commit
90aae5b
Β·
verified Β·
1 Parent(s): 2952b2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -444,7 +444,6 @@
444
  # πŸ”₯ Run Streamlit App
445
  # if __name__ == '__main__':
446
  # main()
447
-
448
  import streamlit as st
449
  import os
450
  import re
@@ -479,24 +478,34 @@ lora_config = LoraConfig(
479
  model = get_peft_model(model, lora_config)
480
  model.eval()
481
 
482
- # πŸ›  Function to Read & Extract Text from PDFs (With Debugging)
483
- def read_files(file):
484
- st.write("πŸ“‚ Processing uploaded file...") # Debugging
485
- file_context = ""
486
  try:
487
- reader = PdfReader(file)
 
 
 
 
 
 
 
 
 
488
  for page in reader.pages:
489
  text = page.extract_text()
490
  if text:
491
  file_context += text + "\n"
492
 
 
 
 
493
  if not file_context.strip():
494
  st.error("⚠️ No text found. The document might be scanned or encrypted.")
495
  return ""
496
 
497
  st.write(f"βœ… Extracted {len(file_context)} characters.") # Debugging
498
  return file_context.strip()
499
-
500
  except Exception as e:
501
  st.error(f"⚠️ Error reading PDF: {e}")
502
  return ""
@@ -595,6 +604,7 @@ if __name__ == '__main__':
595
 
596
 
597
 
 
598
  # import streamlit as st
599
  # from PyPDF2 import PdfReader
600
 
 
444
  # πŸ”₯ Run Streamlit App
445
  # if __name__ == '__main__':
446
  # main()
 
447
  import streamlit as st
448
  import os
449
  import re
 
478
  model = get_peft_model(model, lora_config)
479
  model.eval()
480
 
481
+ # πŸ›  Function to Read & Extract Text from PDFs
482
+ def read_files(uploaded_file):
 
 
483
  try:
484
+ # πŸ”₯ Step 1: Save file to disk first
485
+ temp_pdf_path = "temp_uploaded_file.pdf"
486
+ with open(temp_pdf_path, "wb") as f:
487
+ f.write(uploaded_file.getbuffer()) # Save the file
488
+
489
+ # πŸ”₯ Step 2: Open the saved file and extract text
490
+ st.write("πŸ“‚ Processing saved PDF file...") # Debugging
491
+ file_context = ""
492
+ reader = PdfReader(temp_pdf_path)
493
+
494
  for page in reader.pages:
495
  text = page.extract_text()
496
  if text:
497
  file_context += text + "\n"
498
 
499
+ # πŸ”₯ Step 3: Delete the temp file after reading
500
+ os.remove(temp_pdf_path)
501
+
502
  if not file_context.strip():
503
  st.error("⚠️ No text found. The document might be scanned or encrypted.")
504
  return ""
505
 
506
  st.write(f"βœ… Extracted {len(file_context)} characters.") # Debugging
507
  return file_context.strip()
508
+
509
  except Exception as e:
510
  st.error(f"⚠️ Error reading PDF: {e}")
511
  return ""
 
604
 
605
 
606
 
607
+
608
  # import streamlit as st
609
  # from PyPDF2 import PdfReader
610