Spaces:
Runtime error
Runtime error
File size: 1,723 Bytes
b46e716 152a9ff 95cb17f b46e716 95cb17f b46e716 152a9ff b46e716 152a9ff b46e716 152a9ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling_core.types.doc import ImageRefMode
from pathlib import Path
import os
# Define the document converter
pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = True
pipeline_options.do_table_structure = True
pipeline_options.table_structure_options.do_cell_matching = True
pipeline_options.generate_picture_images = True
doc_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
}
)
def convert_to_markdown(file):
# Use the file-like object directly
input_path = Path(file.name)
# Convert the document
result = doc_converter.convert(str(input_path))
# Prepare output directory
output_dir = Path("output")
output_dir.mkdir(exist_ok=True)
# Save result as markdown
doc_filename = result.input.file.stem
md_filename = output_dir / f"{doc_filename}-with-images.md"
result.document.save_as_markdown(md_filename, image_mode=ImageRefMode.REFERENCED)
# Load the markdown content
with open(md_filename, 'r', encoding='utf-8') as f:
markdown_content = f.read()
return markdown_content
# Create Gradio interface with type="file"
iface = gr.Interface(
fn=convert_to_markdown,
inputs=gr.File(label="Upload your document", type="file"),
outputs="markdown",
title="Document to Markdown Converter",
description="Upload a document (e.g., PDF, DOCX, PPTX) and get its Markdown version."
)
if __name__ == "__main__":
iface.launch() |