Spaces:

feras-vbrl
/

pdf-to-markdown-converter

Running

App Files Files Community

feras-vbrl commited on 24 days ago

Commit

195dd9b

verified ·

1 Parent(s): ad3cd87

Upload 4 files

Browse files

Files changed (3) hide show

README.md +31 -5
app.py +128 -2
requirements.txt +4 -1

README.md CHANGED Viewed

@@ -17,21 +17,47 @@ This application converts PDF documents to Markdown format. It uses the `docling
 - Upload PDF files directly
 - Convert PDFs from URLs
-- Download the resulting Markdown file
 - Clean, user-friendly interface
 ## How to Use
-1. Upload a PDF file using the file uploader or enter a URL to a PDF document
-2. Click the "Convert to Markdown" button
-3. Once conversion is complete, download the Markdown file
 ## Technical Details
 Built with:
 - Streamlit 1.29.0
 - Docling 2.7.0
 ## Deployment
-This application is deployed on Hugging Face Spaces.

 - Upload PDF files directly
 - Convert PDFs from URLs
+- Batch process multiple images using vLLM
+- Download the resulting Markdown files
 - Clean, user-friendly interface
 ## How to Use
+### PDF to Markdown
+1. Select the "PDF to Markdown" tab
+2. Upload a PDF file using the file uploader or enter a URL to a PDF document
+3. Click the "Convert to Markdown" button
+4. Once conversion is complete, download the Markdown file
+### Batch Image Processing
+1. Select the "Batch Image Processing" tab
+2. Upload multiple image files (PNG, JPG, JPEG)
+3. Optionally customize the model path and prompt text
+4. Click the "Process Images" button
+5. Once processing is complete, download the ZIP file containing all results
 ## Technical Details
 Built with:
 - Streamlit 1.29.0
 - Docling 2.7.0
+- docling_core
+- vLLM (for batch processing)
+- Python 3.12
 ## Deployment
+This application is deployed on Hugging Face Spaces.
+To deploy this application:
+1. Create a new Space on Hugging Face (https://huggingface.co/spaces)
+2. Choose "Streamlit" as the SDK
+3. Upload all these files to the Space repository:
+   - app.py
+   - requirements.txt
+   - README.md
+   - runtime.txt
+The application will automatically create any necessary directories when it starts.
+Note: The vLLM functionality requires significant computational resources, so you may need to select a more powerful hardware configuration for your Space.

app.py CHANGED Viewed

@@ -3,6 +3,24 @@ from docling.document_converter import DocumentConverter
 import tempfile
 import os
 import logging
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
@@ -43,7 +61,11 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
-st.title("PDF to Markdown Converter")
 # Initialize session state if it doesn't exist
 if 'converter' not in st.session_state:
@@ -128,4 +150,108 @@ if convert_clicked:
             logger.error(f"Error converting from URL: {str(e)}")
             st.error(f"Error converting from URL: {str(e)}")
     else:
-        st.warning("Please upload a file or enter a URL first")

 import tempfile
 import os
 import logging
+import time
+from PIL import Image
+import zipfile
+import io
+# vLLM and docling_core imports for batch processing
+try:
+    from vllm import LLM, SamplingParams
+    from docling_core.types.doc import DoclingDocument
+    from docling_core.types.doc.document import DocTagsDocument
+    from pathlib import Path
+    VLLM_AVAILABLE = True
+except ImportError:
+    VLLM_AVAILABLE = False
+# Create necessary directories
+os.makedirs("img", exist_ok=True)
+os.makedirs("out", exist_ok=True)
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
     </style>
 """, unsafe_allow_html=True)
+# Create tabs for different functionalities
+tab1, tab2 = st.tabs(["PDF to Markdown", "Batch Image Processing"])
+with tab1:
+    st.title("PDF to Markdown Converter")
 # Initialize session state if it doesn't exist
 if 'converter' not in st.session_state:
             logger.error(f"Error converting from URL: {str(e)}")
             st.error(f"Error converting from URL: {str(e)}")
     else:
+        st.warning("Please upload a file or enter a URL first")
+# Batch processing tab
+with tab2:
+    st.title("Batch Image Processing with vLLM")
+    if not VLLM_AVAILABLE:
+        st.warning("vLLM and docling_core are required for batch processing. Please install them with: pip install vllm docling_core")
+    else:
+        st.write("This feature uses vLLM to process multiple images and convert them to Markdown.")
+        # Ensure directories exist
+        img_dir = "img"
+        out_dir = "out"
+        os.makedirs(img_dir, exist_ok=True)
+        os.makedirs(out_dir, exist_ok=True)
+        st.info(f"Images will be processed from the '{img_dir}' directory and results will be saved to the '{out_dir}' directory.")
+        # Model configuration
+        MODEL_PATH = st.text_input("Model Path", value="ds4sd/SmolDocling-256M-preview")
+        PROMPT_TEXT = st.text_area("Prompt Text", value="Convert page to Docling.")
+        # File uploader for multiple images
+        uploaded_images = st.file_uploader(
+            "Upload image files",
+            type=['png', 'jpg', 'jpeg'],
+            accept_multiple_files=True,
+            key='image_uploader',
+            help="Drag and drop or click to select image files"
+        )
+        # Process button
+        process_clicked = st.button("Process Images", type="primary", key="process_button")
+        if process_clicked and uploaded_images:
+            try:
+                with st.spinner('Processing images...'):
+                    # Initialize LLM
+                    llm = LLM(model=MODEL_PATH, limit_mm_per_prompt={"image": 1})
+                    sampling_params = SamplingParams(
+                        temperature=0.0,
+                        max_tokens=8192
+                    )
+                    chat_template = f"<|im_start|>User:<image>{PROMPT_TEXT}<end_of_utterance>\nAssistant:"
+                    start_time = time.time()
+                    # Create a ZIP file in memory to store all outputs
+                    zip_buffer = io.BytesIO()
+                    with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
+                        progress_bar = st.progress(0)
+                        status_text = st.empty()
+                        for idx, img_file in enumerate(uploaded_images):
+                            img_name = img_file.name
+                            status_text.text(f"Processing {img_name} ({idx+1}/{len(uploaded_images)})")
+                            # Open image
+                            image = Image.open(img_file).convert("RGB")
+                            # Process with vLLM
+                            llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
+                            output = llm.generate([llm_input], sampling_params=sampling_params)[0]
+                            doctags = output.outputs[0].text
+                            img_fn = os.path.splitext(img_name)[0]
+                            # Add doctags to zip
+                            zip_file.writestr(f"{img_fn}.dt", doctags)
+                            # Convert to Docling Document
+                            doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
+                            doc = DoclingDocument(name=img_fn)
+                            doc.load_from_doctags(doctags_doc)
+                            # Export as markdown and add to zip
+                            md_content = doc.export_to_markdown()
+                            zip_file.writestr(f"{img_fn}.md", md_content)
+                            # Update progress
+                            progress_bar.progress((idx + 1) / len(uploaded_images))
+                    total_time = time.time() - start_time
+                    # Offer the ZIP file for download
+                    st.success(f"Processing completed in {total_time:.2f} seconds!")
+                    zip_buffer.seek(0)
+                    st.download_button(
+                        label="Download All Results",
+                        data=zip_buffer,
+                        file_name="processed_images.zip",
+                        mime="application/zip"
+                    )
+            except Exception as e:
+                logger.error(f"Error in batch processing: {str(e)}")
+                st.error(f"Error in batch processing: {str(e)}")
+        elif process_clicked:
+            st.warning("Please upload at least one image file")

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 streamlit==1.29.0
 docling==2.7.0
-watchdog==2.3.1

 streamlit==1.29.0
 docling==2.7.0
+docling_core
+vllm
+watchdog==2.3.1
+pillow