|
import streamlit as st |
|
from docling.document_converter import DocumentConverter |
|
import tempfile |
|
import os |
|
import logging |
|
import time |
|
from PIL import Image |
|
import zipfile |
|
import io |
|
|
|
|
|
try: |
|
from vllm import LLM, SamplingParams |
|
from docling_core.types.doc import DoclingDocument |
|
from docling_core.types.doc.document import DocTagsDocument |
|
from pathlib import Path |
|
VLLM_AVAILABLE = True |
|
except ImportError: |
|
VLLM_AVAILABLE = False |
|
|
|
|
|
os.makedirs("img", exist_ok=True) |
|
os.makedirs("out", exist_ok=True) |
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.stFileUploader { |
|
padding: 1rem; |
|
} |
|
|
|
button[data-testid="stFileUploaderButtonPrimary"] { |
|
background-color: #000660 !important; |
|
border: none !important; |
|
color: white !important; |
|
} |
|
|
|
.stButton button { |
|
background-color: #006666; |
|
border: none !important; |
|
color: white; |
|
padding: 0.5rem 2rem !important; |
|
} |
|
.stButton button:hover { |
|
background-color: #008080 !important; |
|
color: white !important; |
|
border-color: #008080 !important; |
|
} |
|
.upload-text { |
|
font-size: 1.2rem; |
|
margin-bottom: 1rem; |
|
} |
|
div[data-testid="stFileUploadDropzone"]:hover { |
|
border-color: #006666 !important; |
|
background-color: rgba(0, 102, 102, 0.05) !important; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
tab1, tab2 = st.tabs(["PDF to Markdown", "Batch Image Processing"]) |
|
|
|
with tab1: |
|
st.title("PDF to Markdown Converter") |
|
|
|
|
|
if 'converter' not in st.session_state: |
|
try: |
|
st.session_state.converter = DocumentConverter() |
|
logger.debug("Converter successfully created") |
|
except Exception as e: |
|
logger.error(f"Error creating converter: {str(e)}") |
|
st.error(f"Error creating converter: {str(e)}") |
|
st.stop() |
|
|
|
|
|
uploaded_file = st.file_uploader( |
|
"Upload your PDF file", |
|
type=['pdf'], |
|
key='pdf_uploader', |
|
help="Drag and drop or click to select a PDF file (max 200MB)" |
|
) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
url = st.text_input("Or enter a PDF URL") |
|
|
|
|
|
convert_clicked = st.button("Convert to Markdown", type="primary") |
|
|
|
|
|
if convert_clicked: |
|
if uploaded_file is not None: |
|
try: |
|
with st.spinner('Converting file...'): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: |
|
tmp_file.write(uploaded_file.getvalue()) |
|
tmp_path = tmp_file.name |
|
logger.debug(f"Temporary file created at: {tmp_path}") |
|
|
|
try: |
|
result = st.session_state.converter.convert(tmp_path) |
|
markdown_text = result.document.export_to_markdown() |
|
|
|
output_filename = os.path.splitext(uploaded_file.name)[0] + '.md' |
|
|
|
st.success("Conversion completed!") |
|
st.download_button( |
|
label="Download Markdown file", |
|
data=markdown_text, |
|
file_name=output_filename, |
|
mime="text/markdown" |
|
) |
|
|
|
except Exception as e: |
|
logger.error(f"Error converting file: {str(e)}") |
|
st.error(f"Error converting file: {str(e)}") |
|
|
|
finally: |
|
if os.path.exists(tmp_path): |
|
os.unlink(tmp_path) |
|
logger.debug("Temporary file deleted") |
|
|
|
except Exception as e: |
|
logger.error(f"Error processing file: {str(e)}") |
|
st.error(f"Error processing file: {str(e)}") |
|
|
|
elif url: |
|
try: |
|
with st.spinner('Converting from URL...'): |
|
logger.debug(f"Converting from URL: {url}") |
|
result = st.session_state.converter.convert(url) |
|
markdown_text = result.document.export_to_markdown() |
|
|
|
output_filename = url.split('/')[-1].split('.')[0] + '.md' |
|
|
|
st.success("Conversion completed!") |
|
st.download_button( |
|
label="Download Markdown file", |
|
data=markdown_text, |
|
file_name=output_filename, |
|
mime="text/markdown" |
|
) |
|
|
|
except Exception as e: |
|
logger.error(f"Error converting from URL: {str(e)}") |
|
st.error(f"Error converting from URL: {str(e)}") |
|
else: |
|
st.warning("Please upload a file or enter a URL first") |
|
|
|
|
|
with tab2: |
|
st.title("Batch Image Processing with vLLM") |
|
|
|
if not VLLM_AVAILABLE: |
|
st.warning("vLLM and docling_core are required for batch processing. Please install them with: pip install vllm docling_core") |
|
else: |
|
st.write("This feature uses vLLM to process multiple images and convert them to Markdown.") |
|
|
|
|
|
img_dir = "img" |
|
out_dir = "out" |
|
os.makedirs(img_dir, exist_ok=True) |
|
os.makedirs(out_dir, exist_ok=True) |
|
|
|
st.info(f"Images will be processed from the '{img_dir}' directory and results will be saved to the '{out_dir}' directory.") |
|
|
|
|
|
MODEL_PATH = st.text_input("Model Path", value="ds4sd/SmolDocling-256M-preview") |
|
PROMPT_TEXT = st.text_area("Prompt Text", value="Convert page to Docling.") |
|
|
|
|
|
uploaded_images = st.file_uploader( |
|
"Upload image files", |
|
type=['png', 'jpg', 'jpeg'], |
|
accept_multiple_files=True, |
|
key='image_uploader', |
|
help="Drag and drop or click to select image files" |
|
) |
|
|
|
|
|
process_clicked = st.button("Process Images", type="primary", key="process_button") |
|
|
|
if process_clicked and uploaded_images: |
|
try: |
|
with st.spinner('Processing images...'): |
|
|
|
llm = LLM(model=MODEL_PATH, limit_mm_per_prompt={"image": 1}) |
|
|
|
sampling_params = SamplingParams( |
|
temperature=0.0, |
|
max_tokens=8192 |
|
) |
|
|
|
chat_template = f"<|im_start|>User:<image>{PROMPT_TEXT}<end_of_utterance>\nAssistant:" |
|
|
|
start_time = time.time() |
|
|
|
|
|
zip_buffer = io.BytesIO() |
|
with zipfile.ZipFile(zip_buffer, 'w') as zip_file: |
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
for idx, img_file in enumerate(uploaded_images): |
|
img_name = img_file.name |
|
status_text.text(f"Processing {img_name} ({idx+1}/{len(uploaded_images)})") |
|
|
|
|
|
image = Image.open(img_file).convert("RGB") |
|
|
|
|
|
llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}} |
|
output = llm.generate([llm_input], sampling_params=sampling_params)[0] |
|
|
|
doctags = output.outputs[0].text |
|
img_fn = os.path.splitext(img_name)[0] |
|
|
|
|
|
zip_file.writestr(f"{img_fn}.dt", doctags) |
|
|
|
|
|
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image]) |
|
doc = DoclingDocument(name=img_fn) |
|
doc.load_from_doctags(doctags_doc) |
|
|
|
|
|
md_content = doc.export_to_markdown() |
|
zip_file.writestr(f"{img_fn}.md", md_content) |
|
|
|
|
|
progress_bar.progress((idx + 1) / len(uploaded_images)) |
|
|
|
total_time = time.time() - start_time |
|
|
|
|
|
st.success(f"Processing completed in {total_time:.2f} seconds!") |
|
|
|
zip_buffer.seek(0) |
|
st.download_button( |
|
label="Download All Results", |
|
data=zip_buffer, |
|
file_name="processed_images.zip", |
|
mime="application/zip" |
|
) |
|
|
|
except Exception as e: |
|
logger.error(f"Error in batch processing: {str(e)}") |
|
st.error(f"Error in batch processing: {str(e)}") |
|
|
|
elif process_clicked: |
|
st.warning("Please upload at least one image file") |