Spaces:
Running
Running
File size: 1,506 Bytes
30d6a12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import os
import tempfile
import gradio as gr
import ocrmypdf
def ocr_pdf(input_pdf_path, language):
if input_pdf_path is None:
return None
try:
input_base, input_ext = os.path.splitext(os.path.basename(input_pdf_path))
with tempfile.NamedTemporaryFile(suffix=input_ext, prefix=f"{input_base}_ocr_", delete=False) as tmp_output:
output_path = tmp_output.name
ocrmypdf.ocr(
input_pdf_path,
output_path,
deskew=True,
clean=True,
language=language,
#unpaper=True,
force_ocr=True
)
return output_path
except Exception as e:
return f"Error during OCR: {e}"
finally:
if isinstance(input_pdf_path, str) and os.path.exists(input_pdf_path) and "tmp" in input_pdf_path:
try:
os.remove(input_pdf_path)
except OSError as e:
print(f"Error deleting temporary input file: {e}")
if __name__ == "__main__":
app = gr.Interface(
fn=ocr_pdf,
inputs=[
gr.File(label="Upload PDF to OCR"),
gr.Dropdown(
choices=["ita", "eng"],
value="ita",
label="OCR Language",
),
],
outputs=gr.File(label="PDF with OCR"),
title="OCR my PDF",
description=("Add a text layer to your PDF file"),
)
app.launch() |