import os import tempfile import gradio as gr import ocrmypdf def ocr_pdf(input_pdf_path, language): if input_pdf_path is None: return None try: input_base, input_ext = os.path.splitext(os.path.basename(input_pdf_path)) with tempfile.NamedTemporaryFile(suffix=input_ext, prefix=f"{input_base}_ocr_", delete=False) as tmp_output: output_path = tmp_output.name ocrmypdf.ocr( input_pdf_path, output_path, deskew=True, clean=True, language=language, #unpaper=True, force_ocr=True ) return output_path except Exception as e: return f"Error during OCR: {e}" finally: if isinstance(input_pdf_path, str) and os.path.exists(input_pdf_path) and "tmp" in input_pdf_path: try: os.remove(input_pdf_path) except OSError as e: print(f"Error deleting temporary input file: {e}") if __name__ == "__main__": app = gr.Interface( fn=ocr_pdf, inputs=[ gr.File(label="Upload PDF to OCR"), gr.Dropdown( choices=["ita", "eng"], value="ita", label="OCR Language", ), ], outputs=gr.File(label="PDF with OCR"), title="OCR my PDF", description=("Add a text layer to your PDF file"), ) app.launch()