Spaces:

Bayhaqy
/

PDF-Manipulation-App

Runtime error

File size: 7,394 Bytes

## Import Library
import tempfile
from datetime import datetime
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile
import streamlit as st
import streamlit_pydantic as sp
from typing import Optional, List
from streamlit_pydantic.types import FileContent
from pydantic import BaseModel, Field
from PyPDF2 import PdfFileWriter, PdfFileReader
from pdf2image import convert_from_path
from PIL import Image
import os

# Set page configuration
st.set_page_config(
    page_title="PDF Manipulation App",
    page_icon=":page_with_curl:",
    layout="wide",
    initial_sidebar_state="auto",
)

# Add a title
st.title("PDF Manipulation App")
st.caption("Created by Bayhaqy")
st.markdown("This is tools for join and split file PDF")

# Make folder for storing user uploads
destination_folder = Path('downloads')
destination_folder.mkdir(exist_ok=True, parents=True)

# Defines what options are in the form
class PDFMergeRequest(BaseModel):
    pdf_uploads: Optional[List[FileContent]] = Field(
        None,
        alias="PDF File to Split",
        description="PDF that needs to be split",
    )

class PDFSplitRequest(BaseModel):
    pages_per_pdf: int = Field(
        1,
        alias="Pages per Split",
        description="How many pages will be in each output pdf. Should evenly divide the total number of pages.",
    )
    pdf_upload: Optional[FileContent] = Field(
        None,
        alias="PDF File to Split",
        description="PDF that needs to be split",
    )

def stack_images(images):
    """adapted from: https://note.nkmk.me/en/python-pillow-concat-images/"""
    first_image = images[0]
    output_image = Image.new('RGB', (first_image.width, sum((image.height for image in images))))
    output_image.paste(first_image, (0, 0))
    starting_y_value = first_image.height
    for image in images[1:]:
        output_image.paste(image, (0, starting_y_value))
        starting_y_value += image.height
    return output_image

# Radio buttons for selecting the file type 
pdf_output = '.pdf'
jpg_output = '.jpg'
png_output = '.png'
#output_suffix = st.radio('Output File Type', [pdf_output, jpg_output, png_output], key='output_format')
output_suffix = (pdf_output)

# Add a heading
st.markdown("### PDF Manipulation Options")

# Radio buttons for selecting the function
view_choice = st.radio('Select a PDF Function', ('Merge Multiple PDFs into One', 'Split One PDF into Multiple'))

# Display relevant instructions
if view_choice == 'Merge Multiple PDFs into One':
    st.markdown("**Upload multiple PDFs**")

    # Get the data from the form, stop running if user hasn't submitted pdfs yet
    data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest)
    if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2:
        st.warning("Upload at least 2 PDFs and press Submit")
        st.stop()

    # Save Uploaded PDFs
    uploaded_paths = []
    for pdf_data in data.pdf_uploads:
        input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
        input_pdf_path.write_bytes(pdf_data.as_bytes())
        uploaded_paths.append(input_pdf_path)

    pdf_writer = PdfFileWriter()
    for path in uploaded_paths:
        pdf_reader = PdfFileReader(str(path))
        for page in range(pdf_reader.getNumPages()):
            # Add each page to the writer object
            pdf_writer.addPage(pdf_reader.getPage(page))

    # Write out the merged PDF
    output_pdf_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
    with open(str(output_pdf_path), 'wb') as out:
        pdf_writer.write(out)
    output_path = output_pdf_path
    
    # Convert to stacked / merged image
    if output_suffix in (png_output, jpg_output):
        images = convert_from_path(output_pdf_path)
        stacked_image = stack_images(images)
        output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
        stacked_image.save(output_path)  # format inferred

    # Allow download
    if output_suffix == pdf_output:
        output_mime = 'application/pdf'
    elif output_suffix == jpg_output:
        output_mime = 'image/jpeg'
    elif output_suffix == png_output:
        output_mime = 'image/png'

    # Create a download button with a custom label
#    if st.button("Download Merged PDF"):
    st.download_button('Download Merged Document', output_path.read_bytes(), f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}", mime=output_mime)

    # Delete temporary files
    for path in uploaded_paths:
        os.remove(path)
    if output_suffix in (jpg_output, png_output):
        os.remove(output_pdf_path)

    # Delete the output file after download
    os.remove(output_path)
    
elif view_choice == 'Split One PDF into Multiple':
    st.markdown("**Upload a single PDF to split**")

    # Get the data from the form, stop running if user hasn't submitted pdf yet
    data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest)
    if data is None or data.pdf_upload is None:
        st.warning("Upload a PDF and press Submit")
        st.stop()

    # Save Uploaded PDF
    input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
    input_pdf_path.write_bytes(data.pdf_upload.as_bytes())

    # Get PDF Reader
    pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes()))

    if pdf.numPages % data.pages_per_pdf != 0:
        st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per")
        st.stop()

    # Split pdf every pages per pdf. Save each split pdf to file
    downloads = []
    for letter_start in range(0, pdf.numPages, data.pages_per_pdf):
        output = PdfFileWriter()
        output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf")
        for letter_page in range(data.pages_per_pdf):
            output.addPage(pdf.getPage(letter_start + letter_page))

        with open(output_path, "wb") as f:
            output.write(f)

        # Convert to stacked / merged image
        if output_suffix in (png_output, jpg_output):
            images = convert_from_path(output_path)
            stacked_image = stack_images(images)
            output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
            stacked_image.save(output_path)  # format inferred

        downloads.append(output_path)
        st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})")

    # Make zip file of all split pdfs
    zip_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.zip"
    output_zip = ZipFile(str(zip_path), "w")
    for download_path in downloads:
        output_zip.write(str(download_path), arcname=download_path.name)
    output_zip.close()

    # Provide download button of the zip of split pdfs
    st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime='application/zip', key=str(zip_path))

    # Delete temporary files
    for download_path in downloads:
        os.remove(download_path)
    os.remove(zip_path)
    os.remove(input_pdf_path)

    # Delete the output file after download
    os.remove(output_path)