ocr-my-pdf / app.py
Giovanni Spadaro
init
30d6a12
import os
import tempfile
import gradio as gr
import ocrmypdf
def ocr_pdf(input_pdf_path, language):
if input_pdf_path is None:
return None
try:
input_base, input_ext = os.path.splitext(os.path.basename(input_pdf_path))
with tempfile.NamedTemporaryFile(suffix=input_ext, prefix=f"{input_base}_ocr_", delete=False) as tmp_output:
output_path = tmp_output.name
ocrmypdf.ocr(
input_pdf_path,
output_path,
deskew=True,
clean=True,
language=language,
#unpaper=True,
force_ocr=True
)
return output_path
except Exception as e:
return f"Error during OCR: {e}"
finally:
if isinstance(input_pdf_path, str) and os.path.exists(input_pdf_path) and "tmp" in input_pdf_path:
try:
os.remove(input_pdf_path)
except OSError as e:
print(f"Error deleting temporary input file: {e}")
if __name__ == "__main__":
app = gr.Interface(
fn=ocr_pdf,
inputs=[
gr.File(label="Upload PDF to OCR"),
gr.Dropdown(
choices=["ita", "eng"],
value="ita",
label="OCR Language",
),
],
outputs=gr.File(label="PDF with OCR"),
title="OCR my PDF",
description=("Add a text layer to your PDF file"),
)
app.launch()