Spaces:
Runtime error
Runtime error
File size: 7,394 Bytes
de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 1c827ff de86199 |
|
## Import Library
import tempfile
from datetime import datetime
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile
import streamlit as st
import streamlit_pydantic as sp
from typing import Optional, List
from streamlit_pydantic.types import FileContent
from pydantic import BaseModel, Field
from PyPDF2 import PdfFileWriter, PdfFileReader
from pdf2image import convert_from_path
from PIL import Image
import os
# Set page configuration
st.set_page_config(
page_title="PDF Manipulation App",
page_icon=":page_with_curl:",
layout="wide",
initial_sidebar_state="auto",
)
# Add a title
st.title("PDF Manipulation App")
st.caption("Created by Bayhaqy")
st.markdown("This is tools for join and split file PDF")
# Make folder for storing user uploads
destination_folder = Path('downloads')
destination_folder.mkdir(exist_ok=True, parents=True)
# Defines what options are in the form
class PDFMergeRequest(BaseModel):
pdf_uploads: Optional[List[FileContent]] = Field(
None,
alias="PDF File to Split",
description="PDF that needs to be split",
)
class PDFSplitRequest(BaseModel):
pages_per_pdf: int = Field(
1,
alias="Pages per Split",
description="How many pages will be in each output pdf. Should evenly divide the total number of pages.",
)
pdf_upload: Optional[FileContent] = Field(
None,
alias="PDF File to Split",
description="PDF that needs to be split",
)
def stack_images(images):
"""adapted from: https://note.nkmk.me/en/python-pillow-concat-images/"""
first_image = images[0]
output_image = Image.new('RGB', (first_image.width, sum((image.height for image in images))))
output_image.paste(first_image, (0, 0))
starting_y_value = first_image.height
for image in images[1:]:
output_image.paste(image, (0, starting_y_value))
starting_y_value += image.height
return output_image
# Radio buttons for selecting the file type
pdf_output = '.pdf'
jpg_output = '.jpg'
png_output = '.png'
#output_suffix = st.radio('Output File Type', [pdf_output, jpg_output, png_output], key='output_format')
output_suffix = (pdf_output)
# Add a heading
st.markdown("### PDF Manipulation Options")
# Radio buttons for selecting the function
view_choice = st.radio('Select a PDF Function', ('Merge Multiple PDFs into One', 'Split One PDF into Multiple'))
# Display relevant instructions
if view_choice == 'Merge Multiple PDFs into One':
st.markdown("**Upload multiple PDFs**")
# Get the data from the form, stop running if user hasn't submitted pdfs yet
data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest)
if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2:
st.warning("Upload at least 2 PDFs and press Submit")
st.stop()
# Save Uploaded PDFs
uploaded_paths = []
for pdf_data in data.pdf_uploads:
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
input_pdf_path.write_bytes(pdf_data.as_bytes())
uploaded_paths.append(input_pdf_path)
pdf_writer = PdfFileWriter()
for path in uploaded_paths:
pdf_reader = PdfFileReader(str(path))
for page in range(pdf_reader.getNumPages()):
# Add each page to the writer object
pdf_writer.addPage(pdf_reader.getPage(page))
# Write out the merged PDF
output_pdf_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
with open(str(output_pdf_path), 'wb') as out:
pdf_writer.write(out)
output_path = output_pdf_path
# Convert to stacked / merged image
if output_suffix in (png_output, jpg_output):
images = convert_from_path(output_pdf_path)
stacked_image = stack_images(images)
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
stacked_image.save(output_path) # format inferred
# Allow download
if output_suffix == pdf_output:
output_mime = 'application/pdf'
elif output_suffix == jpg_output:
output_mime = 'image/jpeg'
elif output_suffix == png_output:
output_mime = 'image/png'
# Create a download button with a custom label
# if st.button("Download Merged PDF"):
st.download_button('Download Merged Document', output_path.read_bytes(), f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}", mime=output_mime)
# Delete temporary files
for path in uploaded_paths:
os.remove(path)
if output_suffix in (jpg_output, png_output):
os.remove(output_pdf_path)
# Delete the output file after download
os.remove(output_path)
elif view_choice == 'Split One PDF into Multiple':
st.markdown("**Upload a single PDF to split**")
# Get the data from the form, stop running if user hasn't submitted pdf yet
data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest)
if data is None or data.pdf_upload is None:
st.warning("Upload a PDF and press Submit")
st.stop()
# Save Uploaded PDF
input_pdf_path = destination_folder / f"input_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf"
input_pdf_path.write_bytes(data.pdf_upload.as_bytes())
# Get PDF Reader
pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes()))
if pdf.numPages % data.pages_per_pdf != 0:
st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per")
st.stop()
# Split pdf every pages per pdf. Save each split pdf to file
downloads = []
for letter_start in range(0, pdf.numPages, data.pages_per_pdf):
output = PdfFileWriter()
output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.pdf")
for letter_page in range(data.pages_per_pdf):
output.addPage(pdf.getPage(letter_start + letter_page))
with open(output_path, "wb") as f:
output.write(f)
# Convert to stacked / merged image
if output_suffix in (png_output, jpg_output):
images = convert_from_path(output_path)
stacked_image = stack_images(images)
output_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}{output_suffix}"
stacked_image.save(output_path) # format inferred
downloads.append(output_path)
st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})")
# Make zip file of all split pdfs
zip_path = destination_folder / f"output_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')}.zip"
output_zip = ZipFile(str(zip_path), "w")
for download_path in downloads:
output_zip.write(str(download_path), arcname=download_path.name)
output_zip.close()
# Provide download button of the zip of split pdfs
st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime='application/zip', key=str(zip_path))
# Delete temporary files
for download_path in downloads:
os.remove(download_path)
os.remove(zip_path)
os.remove(input_pdf_path)
# Delete the output file after download
os.remove(output_path) |