feras-vbrl commited on
Commit
195dd9b
·
verified ·
1 Parent(s): ad3cd87

Upload 4 files

Browse files
Files changed (3) hide show
  1. README.md +31 -5
  2. app.py +128 -2
  3. requirements.txt +4 -1
README.md CHANGED
@@ -17,21 +17,47 @@ This application converts PDF documents to Markdown format. It uses the `docling
17
 
18
  - Upload PDF files directly
19
  - Convert PDFs from URLs
20
- - Download the resulting Markdown file
 
21
  - Clean, user-friendly interface
22
 
23
  ## How to Use
24
 
25
- 1. Upload a PDF file using the file uploader or enter a URL to a PDF document
26
- 2. Click the "Convert to Markdown" button
27
- 3. Once conversion is complete, download the Markdown file
 
 
 
 
 
 
 
 
 
28
 
29
  ## Technical Details
30
 
31
  Built with:
32
  - Streamlit 1.29.0
33
  - Docling 2.7.0
 
 
 
34
 
35
  ## Deployment
36
 
37
- This application is deployed on Hugging Face Spaces.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  - Upload PDF files directly
19
  - Convert PDFs from URLs
20
+ - Batch process multiple images using vLLM
21
+ - Download the resulting Markdown files
22
  - Clean, user-friendly interface
23
 
24
  ## How to Use
25
 
26
+ ### PDF to Markdown
27
+ 1. Select the "PDF to Markdown" tab
28
+ 2. Upload a PDF file using the file uploader or enter a URL to a PDF document
29
+ 3. Click the "Convert to Markdown" button
30
+ 4. Once conversion is complete, download the Markdown file
31
+
32
+ ### Batch Image Processing
33
+ 1. Select the "Batch Image Processing" tab
34
+ 2. Upload multiple image files (PNG, JPG, JPEG)
35
+ 3. Optionally customize the model path and prompt text
36
+ 4. Click the "Process Images" button
37
+ 5. Once processing is complete, download the ZIP file containing all results
38
 
39
  ## Technical Details
40
 
41
  Built with:
42
  - Streamlit 1.29.0
43
  - Docling 2.7.0
44
+ - docling_core
45
+ - vLLM (for batch processing)
46
+ - Python 3.12
47
 
48
  ## Deployment
49
 
50
+ This application is deployed on Hugging Face Spaces.
51
+
52
+ To deploy this application:
53
+ 1. Create a new Space on Hugging Face (https://huggingface.co/spaces)
54
+ 2. Choose "Streamlit" as the SDK
55
+ 3. Upload all these files to the Space repository:
56
+ - app.py
57
+ - requirements.txt
58
+ - README.md
59
+ - runtime.txt
60
+
61
+ The application will automatically create any necessary directories when it starts.
62
+
63
+ Note: The vLLM functionality requires significant computational resources, so you may need to select a more powerful hardware configuration for your Space.
app.py CHANGED
@@ -3,6 +3,24 @@ from docling.document_converter import DocumentConverter
3
  import tempfile
4
  import os
5
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Configure logging
8
  logging.basicConfig(level=logging.DEBUG)
@@ -43,7 +61,11 @@ st.markdown("""
43
  </style>
44
  """, unsafe_allow_html=True)
45
 
46
- st.title("PDF to Markdown Converter")
 
 
 
 
47
 
48
  # Initialize session state if it doesn't exist
49
  if 'converter' not in st.session_state:
@@ -128,4 +150,108 @@ if convert_clicked:
128
  logger.error(f"Error converting from URL: {str(e)}")
129
  st.error(f"Error converting from URL: {str(e)}")
130
  else:
131
- st.warning("Please upload a file or enter a URL first")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import tempfile
4
  import os
5
  import logging
6
+ import time
7
+ from PIL import Image
8
+ import zipfile
9
+ import io
10
+
11
+ # vLLM and docling_core imports for batch processing
12
+ try:
13
+ from vllm import LLM, SamplingParams
14
+ from docling_core.types.doc import DoclingDocument
15
+ from docling_core.types.doc.document import DocTagsDocument
16
+ from pathlib import Path
17
+ VLLM_AVAILABLE = True
18
+ except ImportError:
19
+ VLLM_AVAILABLE = False
20
+
21
+ # Create necessary directories
22
+ os.makedirs("img", exist_ok=True)
23
+ os.makedirs("out", exist_ok=True)
24
 
25
  # Configure logging
26
  logging.basicConfig(level=logging.DEBUG)
 
61
  </style>
62
  """, unsafe_allow_html=True)
63
 
64
+ # Create tabs for different functionalities
65
+ tab1, tab2 = st.tabs(["PDF to Markdown", "Batch Image Processing"])
66
+
67
+ with tab1:
68
+ st.title("PDF to Markdown Converter")
69
 
70
  # Initialize session state if it doesn't exist
71
  if 'converter' not in st.session_state:
 
150
  logger.error(f"Error converting from URL: {str(e)}")
151
  st.error(f"Error converting from URL: {str(e)}")
152
  else:
153
+ st.warning("Please upload a file or enter a URL first")
154
+
155
+ # Batch processing tab
156
+ with tab2:
157
+ st.title("Batch Image Processing with vLLM")
158
+
159
+ if not VLLM_AVAILABLE:
160
+ st.warning("vLLM and docling_core are required for batch processing. Please install them with: pip install vllm docling_core")
161
+ else:
162
+ st.write("This feature uses vLLM to process multiple images and convert them to Markdown.")
163
+
164
+ # Ensure directories exist
165
+ img_dir = "img"
166
+ out_dir = "out"
167
+ os.makedirs(img_dir, exist_ok=True)
168
+ os.makedirs(out_dir, exist_ok=True)
169
+
170
+ st.info(f"Images will be processed from the '{img_dir}' directory and results will be saved to the '{out_dir}' directory.")
171
+
172
+ # Model configuration
173
+ MODEL_PATH = st.text_input("Model Path", value="ds4sd/SmolDocling-256M-preview")
174
+ PROMPT_TEXT = st.text_area("Prompt Text", value="Convert page to Docling.")
175
+
176
+ # File uploader for multiple images
177
+ uploaded_images = st.file_uploader(
178
+ "Upload image files",
179
+ type=['png', 'jpg', 'jpeg'],
180
+ accept_multiple_files=True,
181
+ key='image_uploader',
182
+ help="Drag and drop or click to select image files"
183
+ )
184
+
185
+ # Process button
186
+ process_clicked = st.button("Process Images", type="primary", key="process_button")
187
+
188
+ if process_clicked and uploaded_images:
189
+ try:
190
+ with st.spinner('Processing images...'):
191
+ # Initialize LLM
192
+ llm = LLM(model=MODEL_PATH, limit_mm_per_prompt={"image": 1})
193
+
194
+ sampling_params = SamplingParams(
195
+ temperature=0.0,
196
+ max_tokens=8192
197
+ )
198
+
199
+ chat_template = f"<|im_start|>User:<image>{PROMPT_TEXT}<end_of_utterance>\nAssistant:"
200
+
201
+ start_time = time.time()
202
+
203
+ # Create a ZIP file in memory to store all outputs
204
+ zip_buffer = io.BytesIO()
205
+ with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
206
+
207
+ progress_bar = st.progress(0)
208
+ status_text = st.empty()
209
+
210
+ for idx, img_file in enumerate(uploaded_images):
211
+ img_name = img_file.name
212
+ status_text.text(f"Processing {img_name} ({idx+1}/{len(uploaded_images)})")
213
+
214
+ # Open image
215
+ image = Image.open(img_file).convert("RGB")
216
+
217
+ # Process with vLLM
218
+ llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
219
+ output = llm.generate([llm_input], sampling_params=sampling_params)[0]
220
+
221
+ doctags = output.outputs[0].text
222
+ img_fn = os.path.splitext(img_name)[0]
223
+
224
+ # Add doctags to zip
225
+ zip_file.writestr(f"{img_fn}.dt", doctags)
226
+
227
+ # Convert to Docling Document
228
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
229
+ doc = DoclingDocument(name=img_fn)
230
+ doc.load_from_doctags(doctags_doc)
231
+
232
+ # Export as markdown and add to zip
233
+ md_content = doc.export_to_markdown()
234
+ zip_file.writestr(f"{img_fn}.md", md_content)
235
+
236
+ # Update progress
237
+ progress_bar.progress((idx + 1) / len(uploaded_images))
238
+
239
+ total_time = time.time() - start_time
240
+
241
+ # Offer the ZIP file for download
242
+ st.success(f"Processing completed in {total_time:.2f} seconds!")
243
+
244
+ zip_buffer.seek(0)
245
+ st.download_button(
246
+ label="Download All Results",
247
+ data=zip_buffer,
248
+ file_name="processed_images.zip",
249
+ mime="application/zip"
250
+ )
251
+
252
+ except Exception as e:
253
+ logger.error(f"Error in batch processing: {str(e)}")
254
+ st.error(f"Error in batch processing: {str(e)}")
255
+
256
+ elif process_clicked:
257
+ st.warning("Please upload at least one image file")
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  streamlit==1.29.0
2
  docling==2.7.0
3
- watchdog==2.3.1
 
 
 
 
1
  streamlit==1.29.0
2
  docling==2.7.0
3
+ docling_core
4
+ vllm
5
+ watchdog==2.3.1
6
+ pillow