Bayhaqy commited on
Commit
de86199
·
1 Parent(s): a77a76e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -0
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Import Library
2
+ import tempfile
3
+ from datetime import datetime
4
+ from io import BytesIO
5
+ from pathlib import Path
6
+ from zipfile import ZipFile
7
+ import streamlit as st
8
+ import streamlit_pydantic as sp
9
+ from typing import Optional, List
10
+ from streamlit_pydantic.types import FileContent
11
+ from pydantic import BaseModel, Field
12
+ from PyPDF2 import PdfFileWriter, PdfFileReader
13
+ from pdf2image import convert_from_path
14
+ from PIL import Image
15
+ import os
16
+
17
+ # Set page configuration
18
+ st.set_page_config(
19
+ page_title="PDF Manipulation App",
20
+ page_icon=":page_with_curl:",
21
+ layout="wide",
22
+ initial_sidebar_state="auto",
23
+ )
24
+
25
+ # Add a title
26
+ st.title("PDF Manipulation App")
27
+ st.caption("Created by Bayhaqy")
28
+ st.markdown("This is tools for join and split file PDF")
29
+
30
+ # Make folder for storing user uploads
31
+ destination_folder = Path("downloads")
32
+ destination_folder.mkdir(exist_ok=True, parents=True)
33
+
34
+ # Defines what options are in the form
35
+ class PDFMergeRequest(BaseModel):
36
+ """
37
+ This code snippet defines a class called PDFMergeRequest that inherits from BaseModel.
38
+ It has a property called pdf_uploads which is an optional list of FileContent objects.
39
+ The Field function is used to specify additional information about this property, such as its default value,
40
+ alias, and description. In this case, the alias is set to "PDF File to Split" and the description is set
41
+ to "PDF that needs to be split".
42
+ """
43
+ pdf_uploads: Optional[List[FileContent]] = Field(
44
+ None,
45
+ alias="PDF File to Split",
46
+ description="PDF that needs to be split",
47
+ )
48
+
49
+ class PDFSplitRequest(BaseModel):
50
+ """
51
+ This code snippet defines a PDFSplitRequest class that inherits from BaseModel.
52
+ It has two attributes: pages_per_pdf and pdf_upload. pages_per_pdf is an integer field with a default value of 1.
53
+ pdf_upload is an optional field that can accept a FileContent object or None as its value.
54
+ Both attributes have aliases and descriptions that provide additional information about their purpose.
55
+ """
56
+ pages_per_pdf: int = Field(
57
+ 1,
58
+ alias="Pages per Split",
59
+ description="How many pages will be in each output pdf. Should evenly divide the total number of pages.",
60
+ )
61
+ pdf_upload: Optional[FileContent] = Field(
62
+ None,
63
+ alias="PDF File to Split",
64
+ description="PDF that needs to be split",
65
+ )
66
+
67
+ def stack_images(images):
68
+ """
69
+ Generate a stacked image by vertically stacking a list of input images.
70
+
71
+ Parameters:
72
+ - images (List[Image]): A list of input images to stack vertically.
73
+
74
+ Returns:
75
+ - output_image (Image): The output stacked image, with each input image vertically stacked on top of each other.
76
+ """
77
+ first_image = images[0]
78
+ output_image = Image.new("RGB", (first_image.width, sum((image.height for image in images))))
79
+ output_image.paste(first_image, (0, 0))
80
+ starting_y_value = first_image.height
81
+ for image in images[1:]:
82
+ output_image.paste(image, (0, starting_y_value))
83
+ starting_y_value += image.height
84
+ return output_image
85
+
86
+ # Radio buttons for selecting the file type
87
+ pdf_output = ".pdf"
88
+ jpg_output = ".jpg"
89
+ png_output = ".png"
90
+ #output_suffix = st.radio("Output File Type", [pdf_output, jpg_output, png_output], key="output_format")
91
+ output_suffix = (pdf_output)
92
+
93
+ # Add a heading
94
+ st.markdown("### PDF Manipulation Options")
95
+
96
+ # Radio buttons for selecting the function
97
+ view_choice = st.radio("Select a PDF Function", ("Merge Multiple PDFs into One", "Split One PDF into Multiple"))
98
+
99
+ # Display relevant instructions
100
+ if view_choice == "Merge Multiple PDFs into One":
101
+ st.markdown("**Upload multiple PDFs**")
102
+
103
+ # Get the data from the form, stop running if user hasn"t submitted pdfs yet
104
+ data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest)
105
+ if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2:
106
+ st.warning("Upload at least 2 PDFs and press Submit")
107
+ st.stop()
108
+
109
+ # Save Uploaded PDFs
110
+ uploaded_paths = []
111
+ for pdf_data in data.pdf_uploads:
112
+ input_pdf_path = destination_folder / f"input_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf"
113
+ input_pdf_path.write_bytes(pdf_data.as_bytes())
114
+ uploaded_paths.append(input_pdf_path)
115
+
116
+ pdf_writer = PdfFileWriter()
117
+ for path in uploaded_paths:
118
+ pdf_reader = PdfFileReader(str(path))
119
+ for page in range(pdf_reader.getNumPages()):
120
+
121
+ # Add each page to the writer object
122
+ pdf_writer.addPage(pdf_reader.getPage(page))
123
+
124
+ # Write out the merged PDF
125
+ output_pdf_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf"
126
+ with open(str(output_pdf_path), "wb") as out:
127
+ pdf_writer.write(out)
128
+ output_path = output_pdf_path
129
+
130
+ # Convert to stacked / merged image
131
+ if output_suffix in (png_output, jpg_output):
132
+ images = convert_from_path(output_pdf_path)
133
+ stacked_image = stack_images(images)
134
+ output_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}{output_suffix}"
135
+ stacked_image.save(output_path) # format inferred
136
+
137
+ # Allow download
138
+ if output_suffix == pdf_output:
139
+ output_mime = "application/pdf"
140
+ elif output_suffix == jpg_output:
141
+ output_mime = "image/jpeg"
142
+ elif output_suffix == png_output:
143
+ output_mime = "image/png"
144
+
145
+ # Create a download button with a custom label
146
+ st.download_button("Download Merged Document", output_path.read_bytes(), f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}{output_suffix}", mime=output_mime)
147
+
148
+ # Delete temporary files
149
+ for path in uploaded_paths:
150
+ os.remove(path)
151
+ if output_suffix in (jpg_output, png_output):
152
+ os.remove(output_pdf_path)
153
+
154
+ # Delete the output file after download
155
+ os.remove(output_path)
156
+
157
+ elif view_choice == "Split One PDF into Multiple":
158
+ st.markdown("**Upload a single PDF to split**")
159
+
160
+ # Get the data from the form, stop running if user hasn"t submitted pdf yet
161
+ data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest)
162
+ if data is None or data.pdf_upload is None:
163
+ st.warning("Upload a PDF and press Submit")
164
+ st.stop()
165
+
166
+ # Save Uploaded PDF
167
+ input_pdf_path = destination_folder / f"input_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf"
168
+ input_pdf_path.write_bytes(data.pdf_upload.as_bytes())
169
+
170
+ # Get PDF Reader
171
+ pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes()))
172
+
173
+ if pdf.numPages % data.pages_per_pdf != 0:
174
+ st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per")
175
+ st.stop()
176
+
177
+ # Split pdf every pages per pdf. Save each split pdf to file
178
+ downloads = []
179
+ for letter_start in range(0, pdf.numPages, data.pages_per_pdf):
180
+ output = PdfFileWriter()
181
+ output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf")
182
+ for letter_page in range(data.pages_per_pdf):
183
+ output.addPage(pdf.getPage(letter_start + letter_page))
184
+
185
+ with open(output_path, "wb") as f:
186
+ output.write(f)
187
+
188
+ # Convert to stacked / merged image
189
+ if output_suffix in (png_output, jpg_output):
190
+ images = convert_from_path(output_path)
191
+ stacked_image = stack_images(images)
192
+ output_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}{output_suffix}"
193
+ stacked_image.save(output_path) # format inferred
194
+
195
+ downloads.append(output_path)
196
+ st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})")
197
+
198
+ # Make zip file of all split pdfs
199
+ zip_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.zip"
200
+ output_zip = ZipFile(str(zip_path), "w")
201
+ for download_path in downloads:
202
+ output_zip.write(str(download_path), arcname=download_path.name)
203
+ output_zip.close()
204
+
205
+ # Provide download button of the zip of split pdfs
206
+ st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime="application/zip", key=str(zip_path))
207
+
208
+ # Delete temporary files
209
+ for download_path in downloads:
210
+ os.remove(download_path)
211
+ os.remove(zip_path)
212
+ os.remove(input_pdf_path)
213
+
214
+ # Delete the output file after download
215
+ os.remove(output_path)