import pytesseract from pdf2image import convert_from_path def pdf_to_text(pdf_path, output_path): # Convert PDF to list of images pages = convert_from_path(pdf_path, 300) # Extract text from all pages and join them text = "" for page in pages: text += pytesseract.image_to_string(page) # Write text to file with open(output_path, "w", encoding="utf-8") as file: file.write(text) print(f"OCR completed. Text saved to {output_path}") # Usage pdf_path = 'input.pdf' output_path = 'output.txt' pdf_to_text(pdf_path, output_path)