Spaces:
Runtime error
Runtime error
## Import Library | |
import tempfile | |
from datetime import datetime | |
from io import BytesIO | |
from pathlib import Path | |
from zipfile import ZipFile | |
import streamlit as st | |
import streamlit_pydantic as sp | |
from typing import Optional, List | |
from streamlit_pydantic.types import FileContent | |
from pydantic import BaseModel, Field | |
from PyPDF2 import PdfFileWriter, PdfFileReader | |
from pdf2image import convert_from_path | |
from PIL import Image | |
import os | |
# Set page configuration | |
st.set_page_config( | |
page_title="PDF Manipulation App", | |
page_icon=":page_with_curl:", | |
layout="wide", | |
initial_sidebar_state="auto", | |
) | |
# Add a title | |
st.title("PDF Manipulation App") | |
st.caption("Created by Bayhaqy") | |
st.markdown("This is tools for join and split file PDF") | |
# Make folder for storing user uploads | |
destination_folder = Path('downloads') | |
destination_folder.mkdir(exist_ok=True, parents=True) | |
# Defines what options are in the form | |
class PDFMergeRequest(BaseModel): | |
pdf_uploads: Optional[List[FileContent]] = Field( | |
None, | |
alias="PDF File to Split", | |
description="PDF that needs to be split", | |
) | |
class PDFSplitRequest(BaseModel): | |
pages_per_pdf: int = Field( | |
1, | |
alias="Pages per Split", | |
description="How many pages will be in each output pdf. Should evenly divide the total number of pages.", | |
) | |
pdf_upload: Optional[FileContent] = Field( | |
None, | |
alias="PDF File to Split", | |
description="PDF that needs to be split", | |
) | |
def stack_images(images): | |
"""adapted from: https://note.nkmk.me/en/python-pillow-concat-images/""" | |
first_image = images[0] | |
output_image = Image.new('RGB', (first_image.width, sum((image.height for image in images)))) | |
output_image.paste(first_image, (0, 0)) | |
starting_y_value = first_image.height | |
for image in images[1:]: | |
output_image.paste(image, (0, starting_y_value)) | |
starting_y_value += image.height | |
return output_image | |
# Radio buttons for selecting the file type | |
pdf_output = '.pdf' | |
jpg_output = '.jpg' | |
png_output = '.png' | |
#output_suffix = st.radio('Output File Type', [pdf_output, jpg_output, png_output], key='output_format') | |
output_suffix = (pdf_output) | |
# Add a heading | |
st.markdown("### PDF Manipulation Options") | |
# Radio buttons for selecting the function | |
view_choice = st.radio('Select a PDF Function', ('Merge Multiple PDFs into One', 'Split One PDF into Multiple')) | |
# Display relevant instructions | |
if view_choice == 'Merge Multiple PDFs into One': | |
st.markdown("**Upload multiple PDFs**") | |
# Get the data from the form, stop running if user hasn't submitted pdfs yet | |
data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest) | |
if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2: | |
st.warning("Upload at least 2 PDFs and press Submit") | |
st.stop() | |
# Save Uploaded PDFs | |
uploaded_paths = [] | |
for pdf_data in data.pdf_uploads: | |
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf" | |
input_pdf_path.write_bytes(pdf_data.as_bytes()) | |
uploaded_paths.append(input_pdf_path) | |
pdf_writer = PdfFileWriter() | |
for path in uploaded_paths: | |
pdf_reader = PdfFileReader(str(path)) | |
for page in range(pdf_reader.getNumPages()): | |
# Add each page to the writer object | |
pdf_writer.addPage(pdf_reader.getPage(page)) | |
# Write out the merged PDF | |
output_pdf_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf" | |
with open(str(output_pdf_path), 'wb') as out: | |
pdf_writer.write(out) | |
output_path = output_pdf_path | |
# Convert to stacked / merged image | |
if output_suffix in (png_output, jpg_output): | |
images = convert_from_path(output_pdf_path) | |
stacked_image = stack_images(images) | |
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}" | |
stacked_image.save(output_path) # format inferred | |
# Allow download | |
if output_suffix == pdf_output: | |
output_mime = 'application/pdf' | |
elif output_suffix == jpg_output: | |
output_mime = 'image/jpeg' | |
elif output_suffix == png_output: | |
output_mime = 'image/png' | |
# Create a download button with a custom label | |
# if st.button("Download Merged PDF"): | |
st.download_button('Download Merged Document', output_path.read_bytes(), f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}", mime=output_mime) | |
# Delete temporary files | |
for path in uploaded_paths: | |
os.remove(path) | |
if output_suffix in (jpg_output, png_output): | |
os.remove(output_pdf_path) | |
# Delete the output file after download | |
os.remove(output_path) | |
elif view_choice == 'Split One PDF into Multiple': | |
st.markdown("**Upload a single PDF to split**") | |
# Get the data from the form, stop running if user hasn't submitted pdf yet | |
data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest) | |
if data is None or data.pdf_upload is None: | |
st.warning("Upload a PDF and press Submit") | |
st.stop() | |
# Save Uploaded PDF | |
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf" | |
input_pdf_path.write_bytes(data.pdf_upload.as_bytes()) | |
# Get PDF Reader | |
pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes())) | |
if pdf.numPages % data.pages_per_pdf != 0: | |
st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per") | |
st.stop() | |
# Split pdf every pages per pdf. Save each split pdf to file | |
downloads = [] | |
for letter_start in range(0, pdf.numPages, data.pages_per_pdf): | |
output = PdfFileWriter() | |
output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf") | |
for letter_page in range(data.pages_per_pdf): | |
output.addPage(pdf.getPage(letter_start + letter_page)) | |
with open(output_path, "wb") as f: | |
output.write(f) | |
# Convert to stacked / merged image | |
if output_suffix in (png_output, jpg_output): | |
images = convert_from_path(output_path) | |
stacked_image = stack_images(images) | |
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}" | |
stacked_image.save(output_path) # format inferred | |
downloads.append(output_path) | |
st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})") | |
# Make zip file of all split pdfs | |
zip_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.zip" | |
output_zip = ZipFile(str(zip_path), "w") | |
for download_path in downloads: | |
output_zip.write(str(download_path), arcname=download_path.name) | |
output_zip.close() | |
# Provide download button of the zip of split pdfs | |
st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime='application/zip', key=str(zip_path)) | |
# Delete temporary files | |
for download_path in downloads: | |
os.remove(download_path) | |
os.remove(zip_path) | |
os.remove(input_pdf_path) | |
# Delete the output file after download | |
os.remove(output_path) |