Spaces:
Runtime error
Runtime error
import gradio as gr | |
from docling.datamodel.base_models import InputFormat | |
from docling.datamodel.pipeline_options import PdfPipelineOptions | |
from docling.document_converter import DocumentConverter, PdfFormatOption | |
from docling_core.types.doc import ImageRefMode | |
from pathlib import Path | |
import os | |
# Define the document converter | |
pipeline_options = PdfPipelineOptions() | |
pipeline_options.do_ocr = True | |
pipeline_options.do_table_structure = True | |
pipeline_options.table_structure_options.do_cell_matching = True | |
pipeline_options.generate_picture_images = True | |
doc_converter = DocumentConverter( | |
format_options={ | |
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) | |
} | |
) | |
def convert_to_markdown(file): | |
# Use the file-like object directly | |
input_path = Path(file.name) | |
# Convert the document | |
result = doc_converter.convert(str(input_path)) | |
# Prepare output directory | |
output_dir = Path("output") | |
output_dir.mkdir(exist_ok=True) | |
# Save result as markdown | |
doc_filename = result.input.file.stem | |
md_filename = output_dir / f"{doc_filename}-with-images.md" | |
result.document.save_as_markdown(md_filename, image_mode=ImageRefMode.REFERENCED) | |
# Load the markdown content | |
with open(md_filename, 'r', encoding='utf-8') as f: | |
markdown_content = f.read() | |
return markdown_content | |
# Create Gradio interface with type="file" | |
iface = gr.Interface( | |
fn=convert_to_markdown, | |
inputs=gr.File(label="Upload your document", type="file"), | |
outputs="markdown", | |
title="Document to Markdown Converter", | |
description="Upload a document (e.g., PDF, DOCX, PPTX) and get its Markdown version." | |
) | |
if __name__ == "__main__": | |
iface.launch() |