Bayhaqy's picture
Update app.py
1c827ff
## Import Library
import tempfile
from datetime import datetime
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile
import streamlit as st
import streamlit_pydantic as sp
from typing import Optional, List
from streamlit_pydantic.types import FileContent
from pydantic import BaseModel, Field
from PyPDF2 import PdfFileWriter, PdfFileReader
from pdf2image import convert_from_path
from PIL import Image
import os
# Set page configuration
st.set_page_config(
page_title="PDF Manipulation App",
page_icon=":page_with_curl:",
layout="wide",
initial_sidebar_state="auto",
)
# Add a title
st.title("PDF Manipulation App")
st.caption("Created by Bayhaqy")
st.markdown("This is tools for join and split file PDF")
# Make folder for storing user uploads
destination_folder = Path('downloads')
destination_folder.mkdir(exist_ok=True, parents=True)
# Defines what options are in the form
class PDFMergeRequest(BaseModel):
pdf_uploads: Optional[List[FileContent]] = Field(
None,
alias="PDF File to Split",
description="PDF that needs to be split",
)
class PDFSplitRequest(BaseModel):
pages_per_pdf: int = Field(
1,
alias="Pages per Split",
description="How many pages will be in each output pdf. Should evenly divide the total number of pages.",
)
pdf_upload: Optional[FileContent] = Field(
None,
alias="PDF File to Split",
description="PDF that needs to be split",
)
def stack_images(images):
"""adapted from: https://note.nkmk.me/en/python-pillow-concat-images/"""
first_image = images[0]
output_image = Image.new('RGB', (first_image.width, sum((image.height for image in images))))
output_image.paste(first_image, (0, 0))
starting_y_value = first_image.height
for image in images[1:]:
output_image.paste(image, (0, starting_y_value))
starting_y_value += image.height
return output_image
# Radio buttons for selecting the file type
pdf_output = '.pdf'
jpg_output = '.jpg'
png_output = '.png'
#output_suffix = st.radio('Output File Type', [pdf_output, jpg_output, png_output], key='output_format')
output_suffix = (pdf_output)
# Add a heading
st.markdown("### PDF Manipulation Options")
# Radio buttons for selecting the function
view_choice = st.radio('Select a PDF Function', ('Merge Multiple PDFs into One', 'Split One PDF into Multiple'))
# Display relevant instructions
if view_choice == 'Merge Multiple PDFs into One':
st.markdown("**Upload multiple PDFs**")
# Get the data from the form, stop running if user hasn't submitted pdfs yet
data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest)
if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2:
st.warning("Upload at least 2 PDFs and press Submit")
st.stop()
# Save Uploaded PDFs
uploaded_paths = []
for pdf_data in data.pdf_uploads:
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
input_pdf_path.write_bytes(pdf_data.as_bytes())
uploaded_paths.append(input_pdf_path)
pdf_writer = PdfFileWriter()
for path in uploaded_paths:
pdf_reader = PdfFileReader(str(path))
for page in range(pdf_reader.getNumPages()):
# Add each page to the writer object
pdf_writer.addPage(pdf_reader.getPage(page))
# Write out the merged PDF
output_pdf_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
with open(str(output_pdf_path), 'wb') as out:
pdf_writer.write(out)
output_path = output_pdf_path
# Convert to stacked / merged image
if output_suffix in (png_output, jpg_output):
images = convert_from_path(output_pdf_path)
stacked_image = stack_images(images)
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
stacked_image.save(output_path) # format inferred
# Allow download
if output_suffix == pdf_output:
output_mime = 'application/pdf'
elif output_suffix == jpg_output:
output_mime = 'image/jpeg'
elif output_suffix == png_output:
output_mime = 'image/png'
# Create a download button with a custom label
# if st.button("Download Merged PDF"):
st.download_button('Download Merged Document', output_path.read_bytes(), f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}", mime=output_mime)
# Delete temporary files
for path in uploaded_paths:
os.remove(path)
if output_suffix in (jpg_output, png_output):
os.remove(output_pdf_path)
# Delete the output file after download
os.remove(output_path)
elif view_choice == 'Split One PDF into Multiple':
st.markdown("**Upload a single PDF to split**")
# Get the data from the form, stop running if user hasn't submitted pdf yet
data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest)
if data is None or data.pdf_upload is None:
st.warning("Upload a PDF and press Submit")
st.stop()
# Save Uploaded PDF
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
input_pdf_path.write_bytes(data.pdf_upload.as_bytes())
# Get PDF Reader
pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes()))
if pdf.numPages % data.pages_per_pdf != 0:
st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per")
st.stop()
# Split pdf every pages per pdf. Save each split pdf to file
downloads = []
for letter_start in range(0, pdf.numPages, data.pages_per_pdf):
output = PdfFileWriter()
output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf")
for letter_page in range(data.pages_per_pdf):
output.addPage(pdf.getPage(letter_start + letter_page))
with open(output_path, "wb") as f:
output.write(f)
# Convert to stacked / merged image
if output_suffix in (png_output, jpg_output):
images = convert_from_path(output_path)
stacked_image = stack_images(images)
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
stacked_image.save(output_path) # format inferred
downloads.append(output_path)
st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})")
# Make zip file of all split pdfs
zip_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.zip"
output_zip = ZipFile(str(zip_path), "w")
for download_path in downloads:
output_zip.write(str(download_path), arcname=download_path.name)
output_zip.close()
# Provide download button of the zip of split pdfs
st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime='application/zip', key=str(zip_path))
# Delete temporary files
for download_path in downloads:
os.remove(download_path)
os.remove(zip_path)
os.remove(input_pdf_path)
# Delete the output file after download
os.remove(output_path)