## Import Library import tempfile from datetime import datetime from io import BytesIO from pathlib import Path from zipfile import ZipFile import streamlit as st import streamlit_pydantic as sp from typing import Optional, List from streamlit_pydantic.types import FileContent from pydantic import BaseModel, Field from PyPDF2 import PdfFileWriter, PdfFileReader from pdf2image import convert_from_path from PIL import Image import os # Set page configuration st.set_page_config( page_title="PDF Manipulation App", page_icon=":page_with_curl:", layout="wide", initial_sidebar_state="auto", ) # Add a title st.title("PDF Manipulation App") st.caption("Created by Bayhaqy") st.markdown("This is tools for join and split file PDF") # Make folder for storing user uploads destination_folder = Path('downloads') destination_folder.mkdir(exist_ok=True, parents=True) # Defines what options are in the form class PDFMergeRequest(BaseModel): pdf_uploads: Optional[List[FileContent]] = Field( None, alias="PDF File to Split", description="PDF that needs to be split", ) class PDFSplitRequest(BaseModel): pages_per_pdf: int = Field( 1, alias="Pages per Split", description="How many pages will be in each output pdf. Should evenly divide the total number of pages.", ) pdf_upload: Optional[FileContent] = Field( None, alias="PDF File to Split", description="PDF that needs to be split", ) def stack_images(images): """adapted from: https://note.nkmk.me/en/python-pillow-concat-images/""" first_image = images[0] output_image = Image.new('RGB', (first_image.width, sum((image.height for image in images)))) output_image.paste(first_image, (0, 0)) starting_y_value = first_image.height for image in images[1:]: output_image.paste(image, (0, starting_y_value)) starting_y_value += image.height return output_image # Radio buttons for selecting the file type pdf_output = '.pdf' jpg_output = '.jpg' png_output = '.png' #output_suffix = st.radio('Output File Type', [pdf_output, jpg_output, png_output], key='output_format') output_suffix = (pdf_output) # Add a heading st.markdown("### PDF Manipulation Options") # Radio buttons for selecting the function view_choice = st.radio('Select a PDF Function', ('Merge Multiple PDFs into One', 'Split One PDF into Multiple')) # Display relevant instructions if view_choice == 'Merge Multiple PDFs into One': st.markdown("**Upload multiple PDFs**") # Get the data from the form, stop running if user hasn't submitted pdfs yet data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest) if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2: st.warning("Upload at least 2 PDFs and press Submit") st.stop() # Save Uploaded PDFs uploaded_paths = [] for pdf_data in data.pdf_uploads: input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf" input_pdf_path.write_bytes(pdf_data.as_bytes()) uploaded_paths.append(input_pdf_path) pdf_writer = PdfFileWriter() for path in uploaded_paths: pdf_reader = PdfFileReader(str(path)) for page in range(pdf_reader.getNumPages()): # Add each page to the writer object pdf_writer.addPage(pdf_reader.getPage(page)) # Write out the merged PDF output_pdf_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf" with open(str(output_pdf_path), 'wb') as out: pdf_writer.write(out) output_path = output_pdf_path # Convert to stacked / merged image if output_suffix in (png_output, jpg_output): images = convert_from_path(output_pdf_path) stacked_image = stack_images(images) output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}" stacked_image.save(output_path) # format inferred # Allow download if output_suffix == pdf_output: output_mime = 'application/pdf' elif output_suffix == jpg_output: output_mime = 'image/jpeg' elif output_suffix == png_output: output_mime = 'image/png' # Create a download button with a custom label # if st.button("Download Merged PDF"): st.download_button('Download Merged Document', output_path.read_bytes(), f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}", mime=output_mime) # Delete temporary files for path in uploaded_paths: os.remove(path) if output_suffix in (jpg_output, png_output): os.remove(output_pdf_path) # Delete the output file after download os.remove(output_path) elif view_choice == 'Split One PDF into Multiple': st.markdown("**Upload a single PDF to split**") # Get the data from the form, stop running if user hasn't submitted pdf yet data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest) if data is None or data.pdf_upload is None: st.warning("Upload a PDF and press Submit") st.stop() # Save Uploaded PDF input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf" input_pdf_path.write_bytes(data.pdf_upload.as_bytes()) # Get PDF Reader pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes())) if pdf.numPages % data.pages_per_pdf != 0: st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per") st.stop() # Split pdf every pages per pdf. Save each split pdf to file downloads = [] for letter_start in range(0, pdf.numPages, data.pages_per_pdf): output = PdfFileWriter() output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf") for letter_page in range(data.pages_per_pdf): output.addPage(pdf.getPage(letter_start + letter_page)) with open(output_path, "wb") as f: output.write(f) # Convert to stacked / merged image if output_suffix in (png_output, jpg_output): images = convert_from_path(output_path) stacked_image = stack_images(images) output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}" stacked_image.save(output_path) # format inferred downloads.append(output_path) st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})") # Make zip file of all split pdfs zip_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.zip" output_zip = ZipFile(str(zip_path), "w") for download_path in downloads: output_zip.write(str(download_path), arcname=download_path.name) output_zip.close() # Provide download button of the zip of split pdfs st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime='application/zip', key=str(zip_path)) # Delete temporary files for download_path in downloads: os.remove(download_path) os.remove(zip_path) os.remove(input_pdf_path) # Delete the output file after download os.remove(output_path)