Spaces:
Runtime error
Runtime error
File size: 7,394 Bytes
de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
## Import Library
import tempfile
from datetime import datetime
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile
import streamlit as st
import streamlit_pydantic as sp
from typing import Optional, List
from streamlit_pydantic.types import FileContent
from pydantic import BaseModel, Field
from PyPDF2 import PdfFileWriter, PdfFileReader
from pdf2image import convert_from_path
from PIL import Image
import os
# Set page configuration
st.set_page_config(
page_title="PDF Manipulation App",
page_icon=":page_with_curl:",
layout="wide",
initial_sidebar_state="auto",
)
# Add a title
st.title("PDF Manipulation App")
st.caption("Created by Bayhaqy")
st.markdown("This is tools for join and split file PDF")
# Make folder for storing user uploads
destination_folder = Path('downloads')
destination_folder.mkdir(exist_ok=True, parents=True)
# Defines what options are in the form
class PDFMergeRequest(BaseModel):
pdf_uploads: Optional[List[FileContent]] = Field(
None,
alias="PDF File to Split",
description="PDF that needs to be split",
)
class PDFSplitRequest(BaseModel):
pages_per_pdf: int = Field(
1,
alias="Pages per Split",
description="How many pages will be in each output pdf. Should evenly divide the total number of pages.",
)
pdf_upload: Optional[FileContent] = Field(
None,
alias="PDF File to Split",
description="PDF that needs to be split",
)
def stack_images(images):
"""adapted from: https://note.nkmk.me/en/python-pillow-concat-images/"""
first_image = images[0]
output_image = Image.new('RGB', (first_image.width, sum((image.height for image in images))))
output_image.paste(first_image, (0, 0))
starting_y_value = first_image.height
for image in images[1:]:
output_image.paste(image, (0, starting_y_value))
starting_y_value += image.height
return output_image
# Radio buttons for selecting the file type
pdf_output = '.pdf'
jpg_output = '.jpg'
png_output = '.png'
#output_suffix = st.radio('Output File Type', [pdf_output, jpg_output, png_output], key='output_format')
output_suffix = (pdf_output)
# Add a heading
st.markdown("### PDF Manipulation Options")
# Radio buttons for selecting the function
view_choice = st.radio('Select a PDF Function', ('Merge Multiple PDFs into One', 'Split One PDF into Multiple'))
# Display relevant instructions
if view_choice == 'Merge Multiple PDFs into One':
st.markdown("**Upload multiple PDFs**")
# Get the data from the form, stop running if user hasn't submitted pdfs yet
data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest)
if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2:
st.warning("Upload at least 2 PDFs and press Submit")
st.stop()
# Save Uploaded PDFs
uploaded_paths = []
for pdf_data in data.pdf_uploads:
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
input_pdf_path.write_bytes(pdf_data.as_bytes())
uploaded_paths.append(input_pdf_path)
pdf_writer = PdfFileWriter()
for path in uploaded_paths:
pdf_reader = PdfFileReader(str(path))
for page in range(pdf_reader.getNumPages()):
# Add each page to the writer object
pdf_writer.addPage(pdf_reader.getPage(page))
# Write out the merged PDF
output_pdf_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
with open(str(output_pdf_path), 'wb') as out:
pdf_writer.write(out)
output_path = output_pdf_path
# Convert to stacked / merged image
if output_suffix in (png_output, jpg_output):
images = convert_from_path(output_pdf_path)
stacked_image = stack_images(images)
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
stacked_image.save(output_path) # format inferred
# Allow download
if output_suffix == pdf_output:
output_mime = 'application/pdf'
elif output_suffix == jpg_output:
output_mime = 'image/jpeg'
elif output_suffix == png_output:
output_mime = 'image/png'
# Create a download button with a custom label
# if st.button("Download Merged PDF"):
st.download_button('Download Merged Document', output_path.read_bytes(), f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}", mime=output_mime)
# Delete temporary files
for path in uploaded_paths:
os.remove(path)
if output_suffix in (jpg_output, png_output):
os.remove(output_pdf_path)
# Delete the output file after download
os.remove(output_path)
elif view_choice == 'Split One PDF into Multiple':
st.markdown("**Upload a single PDF to split**")
# Get the data from the form, stop running if user hasn't submitted pdf yet
data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest)
if data is None or data.pdf_upload is None:
st.warning("Upload a PDF and press Submit")
st.stop()
# Save Uploaded PDF
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
input_pdf_path.write_bytes(data.pdf_upload.as_bytes())
# Get PDF Reader
pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes()))
if pdf.numPages % data.pages_per_pdf != 0:
st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per")
st.stop()
# Split pdf every pages per pdf. Save each split pdf to file
downloads = []
for letter_start in range(0, pdf.numPages, data.pages_per_pdf):
output = PdfFileWriter()
output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf")
for letter_page in range(data.pages_per_pdf):
output.addPage(pdf.getPage(letter_start + letter_page))
with open(output_path, "wb") as f:
output.write(f)
# Convert to stacked / merged image
if output_suffix in (png_output, jpg_output):
images = convert_from_path(output_path)
stacked_image = stack_images(images)
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
stacked_image.save(output_path) # format inferred
downloads.append(output_path)
st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})")
# Make zip file of all split pdfs
zip_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.zip"
output_zip = ZipFile(str(zip_path), "w")
for download_path in downloads:
output_zip.write(str(download_path), arcname=download_path.name)
output_zip.close()
# Provide download button of the zip of split pdfs
st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime='application/zip', key=str(zip_path))
# Delete temporary files
for download_path in downloads:
os.remove(download_path)
os.remove(zip_path)
os.remove(input_pdf_path)
# Delete the output file after download
os.remove(output_path) |