Skip to content

Instantly share code, notes, and snippets.

@Teinc3
Created November 8, 2024 15:07
Show Gist options
  • Select an option

  • Save Teinc3/a004102454b88df35ad28ff041eb32df to your computer and use it in GitHub Desktop.

Select an option

Save Teinc3/a004102454b88df35ad28ff041eb32df to your computer and use it in GitHub Desktop.
import os
import pdfplumber
from reportlab.pdfgen import canvas
import shutil
# Check if "io" folder exists
if not os.path.exists("io"):
os.makedirs("io")
print("The 'io' directory has been created. Please place your input PDF file in the 'io' directory and run the program again.")
exit()
# List files in "io" directory
files = os.listdir("io")
pdf_files = [f for f in files if f.endswith('.pdf')]
if len(pdf_files) == 0:
print("No PDF files found in the 'io' directory. Please add a PDF file and run the program again.")
exit()
elif len(pdf_files) == 1:
input_pdf_path = os.path.join("io", pdf_files[0])
use_file = input(f"Found one PDF file: {pdf_files[0]}. Do you want to use this file? (y/n): ")
if use_file.lower() != 'y':
print("Please place the desired PDF file in the 'io' directory and run the program again.")
exit()
else:
file_name = input("Enter the PDF file name in the 'io' directory: ")
if not file_name.endswith('.pdf'):
file_name += '.pdf'
input_pdf_path = os.path.join("io", file_name)
output_pdf_path = input_pdf_path.replace(".pdf", "_split.pdf")
# Parameters
split_height = 841.89 # Height for A4 size in points
page_width = 595.28 # Width of A4 size in points
# Create "tmp" directory if not exists
tmp_dir = "tmp"
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
# Open the PDF with pdfplumber
with pdfplumber.open(input_pdf_path) as pdf:
page = pdf.pages[0] # Assuming the PDF has only one long page
full_height = page.height
# Calculate the number of splits needed
num_splits = int(full_height // split_height) + 1
# Create a new PDF to save the split pages
c = canvas.Canvas(output_pdf_path, pagesize=(page_width, split_height))
for i in range(num_splits):
# Define the crop box for each section
top = full_height - (i * split_height)
bottom = max(top - split_height, 0)
# Extract the cropped image
cropped_page = page.within_bbox((0, bottom, page_width, top))
# Render the cropped image onto the canvas
img_path = os.path.join(tmp_dir, f"temp_page_{i}.png")
cropped_image = cropped_page.to_image(resolution=300)
cropped_image.save(img_path) # Save the cropped section as an image with higher resolution
# Flip the order of the pages
for i in range(num_splits - 1, -1, -1):
img_path = os.path.join(tmp_dir, f"temp_page_{i}.png")
c.drawImage(img_path, 0, 0, width=page_width, height=split_height)
c.showPage()
# Save the final PDF
c.save()
# Clean up the "tmp" directory
shutil.rmtree(tmp_dir)
print("PDF split and saved successfully!")
pdfplumber
reportlab
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment