ikraamkb commited on
Commit
61ab252
Β·
verified Β·
1 Parent(s): 3403b3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -133,6 +133,7 @@ from starlette.responses import RedirectResponse
133
  from tika import parser
134
  from openpyxl import load_workbook
135
  import os
 
136
  # Initialize Tika for DOCX & PPTX parsing
137
  tika.initVM()
138
 
@@ -159,10 +160,11 @@ def validate_file_type(file):
159
  return None
160
  return "❌ Invalid file format!"
161
 
 
162
  # βœ… Extract Text from PDF
163
  def extract_text_from_pdf(file_bytes):
164
  try:
165
- doc = fitz.open(stream=file_bytes, filetype="pdf")
166
  return "\n".join([page.get_text() for page in doc])
167
  except Exception as e:
168
  print(f"❌ PDF Extraction Error: {e}") # Log error
 
133
  from tika import parser
134
  from openpyxl import load_workbook
135
  import os
136
+ import pymupdf
137
  # Initialize Tika for DOCX & PPTX parsing
138
  tika.initVM()
139
 
 
160
  return None
161
  return "❌ Invalid file format!"
162
 
163
+ # βœ… Extract Text from PDF
164
  # βœ… Extract Text from PDF
165
  def extract_text_from_pdf(file_bytes):
166
  try:
167
+ doc = pymupdf.open(stream=file_bytes, filetype="pdf") # Use pymupdf.open()
168
  return "\n".join([page.get_text() for page in doc])
169
  except Exception as e:
170
  print(f"❌ PDF Extraction Error: {e}") # Log error