File size: 7,394 Bytes
de86199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c827ff
de86199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c827ff
de86199
1c827ff
de86199
 
 
 
 
 
 
 
1c827ff
 
 
 
de86199
 
 
 
 
 
1c827ff
de86199
 
1c827ff
de86199
 
1c827ff
de86199
 
 
 
 
 
 
 
1c827ff
de86199
 
 
 
 
 
 
 
 
 
 
1c827ff
 
de86199
 
 
 
 
 
 
1c827ff
de86199
 
 
 
1c827ff
de86199
1c827ff
de86199
1c827ff
de86199
 
1c827ff
 
de86199
 
 
 
 
 
 
 
 
 
1c827ff
de86199
 
1c827ff
de86199
 
 
 
 
 
1c827ff
de86199
 
 
 
 
 
 
 
 
 
 
 
 
1c827ff
de86199
 
 
 
 
 
 
 
 
 
1c827ff
de86199
 
 
 
 
 
1c827ff
de86199
 
 
 
 
 
1c827ff
de86199
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
## Import Library
import tempfile
from datetime import datetime
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile
import streamlit as st
import streamlit_pydantic as sp
from typing import Optional, List
from streamlit_pydantic.types import FileContent
from pydantic import BaseModel, Field
from PyPDF2 import PdfFileWriter, PdfFileReader
from pdf2image import convert_from_path
from PIL import Image
import os

# Set page configuration
st.set_page_config(
    page_title="PDF Manipulation App",
    page_icon=":page_with_curl:",
    layout="wide",
    initial_sidebar_state="auto",
)

# Add a title
st.title("PDF Manipulation App")
st.caption("Created by Bayhaqy")
st.markdown("This is tools for join and split file PDF")

# Make folder for storing user uploads
destination_folder = Path('downloads')
destination_folder.mkdir(exist_ok=True, parents=True)

# Defines what options are in the form
class PDFMergeRequest(BaseModel):
    pdf_uploads: Optional[List[FileContent]] = Field(
        None,
        alias="PDF File to Split",
        description="PDF that needs to be split",
    )

class PDFSplitRequest(BaseModel):
    pages_per_pdf: int = Field(
        1,
        alias="Pages per Split",
        description="How many pages will be in each output pdf. Should evenly divide the total number of pages.",
    )
    pdf_upload: Optional[FileContent] = Field(
        None,
        alias="PDF File to Split",
        description="PDF that needs to be split",
    )

def stack_images(images):
    """adapted from: https://note.nkmk.me/en/python-pillow-concat-images/"""
    first_image = images[0]
    output_image = Image.new('RGB', (first_image.width, sum((image.height for image in images))))
    output_image.paste(first_image, (0, 0))
    starting_y_value = first_image.height
    for image in images[1:]:
        output_image.paste(image, (0, starting_y_value))
        starting_y_value += image.height
    return output_image

# Radio buttons for selecting the file type 
pdf_output = '.pdf'
jpg_output = '.jpg'
png_output = '.png'
#output_suffix = st.radio('Output File Type', [pdf_output, jpg_output, png_output], key='output_format')
output_suffix = (pdf_output)

# Add a heading
st.markdown("### PDF Manipulation Options")

# Radio buttons for selecting the function
view_choice = st.radio('Select a PDF Function', ('Merge Multiple PDFs into One', 'Split One PDF into Multiple'))

# Display relevant instructions
if view_choice == 'Merge Multiple PDFs into One':
    st.markdown("**Upload multiple PDFs**")

    # Get the data from the form, stop running if user hasn't submitted pdfs yet
    data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest)
    if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2:
        st.warning("Upload at least 2 PDFs and press Submit")
        st.stop()

    # Save Uploaded PDFs
    uploaded_paths = []
    for pdf_data in data.pdf_uploads:
        input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
        input_pdf_path.write_bytes(pdf_data.as_bytes())
        uploaded_paths.append(input_pdf_path)

    pdf_writer = PdfFileWriter()
    for path in uploaded_paths:
        pdf_reader = PdfFileReader(str(path))
        for page in range(pdf_reader.getNumPages()):
            # Add each page to the writer object
            pdf_writer.addPage(pdf_reader.getPage(page))

    # Write out the merged PDF
    output_pdf_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
    with open(str(output_pdf_path), 'wb') as out:
        pdf_writer.write(out)
    output_path = output_pdf_path
    
    # Convert to stacked / merged image
    if output_suffix in (png_output, jpg_output):
        images = convert_from_path(output_pdf_path)
        stacked_image = stack_images(images)
        output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
        stacked_image.save(output_path)  # format inferred

    # Allow download
    if output_suffix == pdf_output:
        output_mime = 'application/pdf'
    elif output_suffix == jpg_output:
        output_mime = 'image/jpeg'
    elif output_suffix == png_output:
        output_mime = 'image/png'

    # Create a download button with a custom label
#    if st.button("Download Merged PDF"):
    st.download_button('Download Merged Document', output_path.read_bytes(), f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}", mime=output_mime)

    # Delete temporary files
    for path in uploaded_paths:
        os.remove(path)
    if output_suffix in (jpg_output, png_output):
        os.remove(output_pdf_path)

    # Delete the output file after download
    os.remove(output_path)
    
elif view_choice == 'Split One PDF into Multiple':
    st.markdown("**Upload a single PDF to split**")

    # Get the data from the form, stop running if user hasn't submitted pdf yet
    data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest)
    if data is None or data.pdf_upload is None:
        st.warning("Upload a PDF and press Submit")
        st.stop()

    # Save Uploaded PDF
    input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
    input_pdf_path.write_bytes(data.pdf_upload.as_bytes())

    # Get PDF Reader
    pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes()))

    if pdf.numPages % data.pages_per_pdf != 0:
        st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per")
        st.stop()

    # Split pdf every pages per pdf. Save each split pdf to file
    downloads = []
    for letter_start in range(0, pdf.numPages, data.pages_per_pdf):
        output = PdfFileWriter()
        output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf")
        for letter_page in range(data.pages_per_pdf):
            output.addPage(pdf.getPage(letter_start + letter_page))

        with open(output_path, "wb") as f:
            output.write(f)

        # Convert to stacked / merged image
        if output_suffix in (png_output, jpg_output):
            images = convert_from_path(output_path)
            stacked_image = stack_images(images)
            output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
            stacked_image.save(output_path)  # format inferred

        downloads.append(output_path)
        st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})")

    # Make zip file of all split pdfs
    zip_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.zip"
    output_zip = ZipFile(str(zip_path), "w")
    for download_path in downloads:
        output_zip.write(str(download_path), arcname=download_path.name)
    output_zip.close()

    # Provide download button of the zip of split pdfs
    st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime='application/zip', key=str(zip_path))

    # Delete temporary files
    for download_path in downloads:
        os.remove(download_path)
    os.remove(zip_path)
    os.remove(input_pdf_path)

    # Delete the output file after download
    os.remove(output_path)