Created
November 8, 2024 15:07
-
-
Save Teinc3/a004102454b88df35ad28ff041eb32df to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import pdfplumber | |
| from reportlab.pdfgen import canvas | |
| import shutil | |
| # Check if "io" folder exists | |
| if not os.path.exists("io"): | |
| os.makedirs("io") | |
| print("The 'io' directory has been created. Please place your input PDF file in the 'io' directory and run the program again.") | |
| exit() | |
| # List files in "io" directory | |
| files = os.listdir("io") | |
| pdf_files = [f for f in files if f.endswith('.pdf')] | |
| if len(pdf_files) == 0: | |
| print("No PDF files found in the 'io' directory. Please add a PDF file and run the program again.") | |
| exit() | |
| elif len(pdf_files) == 1: | |
| input_pdf_path = os.path.join("io", pdf_files[0]) | |
| use_file = input(f"Found one PDF file: {pdf_files[0]}. Do you want to use this file? (y/n): ") | |
| if use_file.lower() != 'y': | |
| print("Please place the desired PDF file in the 'io' directory and run the program again.") | |
| exit() | |
| else: | |
| file_name = input("Enter the PDF file name in the 'io' directory: ") | |
| if not file_name.endswith('.pdf'): | |
| file_name += '.pdf' | |
| input_pdf_path = os.path.join("io", file_name) | |
| output_pdf_path = input_pdf_path.replace(".pdf", "_split.pdf") | |
| # Parameters | |
| split_height = 841.89 # Height for A4 size in points | |
| page_width = 595.28 # Width of A4 size in points | |
| # Create "tmp" directory if not exists | |
| tmp_dir = "tmp" | |
| if not os.path.exists(tmp_dir): | |
| os.makedirs(tmp_dir) | |
| # Open the PDF with pdfplumber | |
| with pdfplumber.open(input_pdf_path) as pdf: | |
| page = pdf.pages[0] # Assuming the PDF has only one long page | |
| full_height = page.height | |
| # Calculate the number of splits needed | |
| num_splits = int(full_height // split_height) + 1 | |
| # Create a new PDF to save the split pages | |
| c = canvas.Canvas(output_pdf_path, pagesize=(page_width, split_height)) | |
| for i in range(num_splits): | |
| # Define the crop box for each section | |
| top = full_height - (i * split_height) | |
| bottom = max(top - split_height, 0) | |
| # Extract the cropped image | |
| cropped_page = page.within_bbox((0, bottom, page_width, top)) | |
| # Render the cropped image onto the canvas | |
| img_path = os.path.join(tmp_dir, f"temp_page_{i}.png") | |
| cropped_image = cropped_page.to_image(resolution=300) | |
| cropped_image.save(img_path) # Save the cropped section as an image with higher resolution | |
| # Flip the order of the pages | |
| for i in range(num_splits - 1, -1, -1): | |
| img_path = os.path.join(tmp_dir, f"temp_page_{i}.png") | |
| c.drawImage(img_path, 0, 0, width=page_width, height=split_height) | |
| c.showPage() | |
| # Save the final PDF | |
| c.save() | |
| # Clean up the "tmp" directory | |
| shutil.rmtree(tmp_dir) | |
| print("PDF split and saved successfully!") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| pdfplumber | |
| reportlab |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment