import streamlit as st from docling.document_converter import DocumentConverter import tempfile import os import logging import time from PIL import Image import zipfile import io # vLLM and docling_core imports for batch processing try: from vllm import LLM, SamplingParams from docling_core.types.doc import DoclingDocument from docling_core.types.doc.document import DocTagsDocument from pathlib import Path VLLM_AVAILABLE = True except ImportError: VLLM_AVAILABLE = False # Create necessary directories os.makedirs("img", exist_ok=True) os.makedirs("out", exist_ok=True) # Configure logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # Custom CSS for better layout st.markdown(""" """, unsafe_allow_html=True) # Create tabs for different functionalities tab1, tab2 = st.tabs(["PDF to Markdown", "Batch Image Processing"]) with tab1: st.title("PDF to Markdown Converter") # Initialize session state if it doesn't exist if 'converter' not in st.session_state: try: st.session_state.converter = DocumentConverter() logger.debug("Converter successfully created") except Exception as e: logger.error(f"Error creating converter: {str(e)}") st.error(f"Error creating converter: {str(e)}") st.stop() # Main upload area uploaded_file = st.file_uploader( "Upload your PDF file", type=['pdf'], key='pdf_uploader', help="Drag and drop or click to select a PDF file (max 200MB)" ) # URL input area with spacing st.markdown("
", unsafe_allow_html=True) url = st.text_input("Or enter a PDF URL") # Unified convert button convert_clicked = st.button("Convert to Markdown", type="primary") # Process either uploaded file or URL if convert_clicked: if uploaded_file is not None: try: with st.spinner('Converting file...'): with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_path = tmp_file.name logger.debug(f"Temporary file created at: {tmp_path}") try: result = st.session_state.converter.convert(tmp_path) markdown_text = result.document.export_to_markdown() output_filename = os.path.splitext(uploaded_file.name)[0] + '.md' st.success("Conversion completed!") st.download_button( label="Download Markdown file", data=markdown_text, file_name=output_filename, mime="text/markdown" ) except Exception as e: logger.error(f"Error converting file: {str(e)}") st.error(f"Error converting file: {str(e)}") finally: if os.path.exists(tmp_path): os.unlink(tmp_path) logger.debug("Temporary file deleted") except Exception as e: logger.error(f"Error processing file: {str(e)}") st.error(f"Error processing file: {str(e)}") elif url: try: with st.spinner('Converting from URL...'): logger.debug(f"Converting from URL: {url}") result = st.session_state.converter.convert(url) markdown_text = result.document.export_to_markdown() output_filename = url.split('/')[-1].split('.')[0] + '.md' st.success("Conversion completed!") st.download_button( label="Download Markdown file", data=markdown_text, file_name=output_filename, mime="text/markdown" ) except Exception as e: logger.error(f"Error converting from URL: {str(e)}") st.error(f"Error converting from URL: {str(e)}") else: st.warning("Please upload a file or enter a URL first") # Batch processing tab with tab2: st.title("Batch Image Processing with vLLM") if not VLLM_AVAILABLE: st.warning("vLLM and docling_core are required for batch processing. Please install them with: pip install vllm docling_core") else: st.write("This feature uses vLLM to process multiple images and convert them to Markdown.") # Ensure directories exist img_dir = "img" out_dir = "out" os.makedirs(img_dir, exist_ok=True) os.makedirs(out_dir, exist_ok=True) st.info(f"Images will be processed from the '{img_dir}' directory and results will be saved to the '{out_dir}' directory.") # Model configuration MODEL_PATH = st.text_input("Model Path", value="ds4sd/SmolDocling-256M-preview") PROMPT_TEXT = st.text_area("Prompt Text", value="Convert page to Docling.") # File uploader for multiple images uploaded_images = st.file_uploader( "Upload image files", type=['png', 'jpg', 'jpeg'], accept_multiple_files=True, key='image_uploader', help="Drag and drop or click to select image files" ) # Process button process_clicked = st.button("Process Images", type="primary", key="process_button") if process_clicked and uploaded_images: try: with st.spinner('Processing images...'): # Initialize LLM llm = LLM(model=MODEL_PATH, limit_mm_per_prompt={"image": 1}) sampling_params = SamplingParams( temperature=0.0, max_tokens=8192 ) chat_template = f"<|im_start|>User:{PROMPT_TEXT}\nAssistant:" start_time = time.time() # Create a ZIP file in memory to store all outputs zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, 'w') as zip_file: progress_bar = st.progress(0) status_text = st.empty() for idx, img_file in enumerate(uploaded_images): img_name = img_file.name status_text.text(f"Processing {img_name} ({idx+1}/{len(uploaded_images)})") # Open image image = Image.open(img_file).convert("RGB") # Process with vLLM llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}} output = llm.generate([llm_input], sampling_params=sampling_params)[0] doctags = output.outputs[0].text img_fn = os.path.splitext(img_name)[0] # Add doctags to zip zip_file.writestr(f"{img_fn}.dt", doctags) # Convert to Docling Document doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image]) doc = DoclingDocument(name=img_fn) doc.load_from_doctags(doctags_doc) # Export as markdown and add to zip md_content = doc.export_to_markdown() zip_file.writestr(f"{img_fn}.md", md_content) # Update progress progress_bar.progress((idx + 1) / len(uploaded_images)) total_time = time.time() - start_time # Offer the ZIP file for download st.success(f"Processing completed in {total_time:.2f} seconds!") zip_buffer.seek(0) st.download_button( label="Download All Results", data=zip_buffer, file_name="processed_images.zip", mime="application/zip" ) except Exception as e: logger.error(f"Error in batch processing: {str(e)}") st.error(f"Error in batch processing: {str(e)}") elif process_clicked: st.warning("Please upload at least one image file")