Last active
December 27, 2015 11:55
-
-
Save jgasteiz/431753041918dda13e6d to your computer and use it in GitHub Desktop.
Python script that accepts a pdf file as input and creates an output with all the pages split in half, by the x axis – the output file will have twice as many pages as the original file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
Python script that accepts a pdf file as input and creates an output with all | |
the pages split in half, by the x axis - the output file will have twice as | |
many pages as the original file. | |
e.g. `python split_in_half.py ~/Documents/my-double-paged-document.pdf` | |
An output file called `output.pdf` will be created. | |
Requirements: | |
- [pyPdf](https://pypi.python.org/pypi/pyPdf/) | |
""" | |
import sys | |
from copy import copy | |
from pyPdf import PdfFileWriter, PdfFileReader | |
from pyPdf.utils import PdfReadError | |
def get_file_input(file_path): | |
""" | |
Given a file_path, create a PdfFileReader instance and return it. | |
""" | |
try: | |
return PdfFileReader(file(file_path, "rb")) | |
except PdfReadError, e: | |
sys.exit('Error opening the file `{}`: {}'.format(file_path, e)) | |
except Exception, e: | |
sys.exit('{}: {}'.format(e.__class__, e)) | |
def get_pages(page): | |
""" | |
Given a pdf page, split it in half by the x axis and return both halves | |
as new pdf pages. | |
""" | |
page_left = page | |
page_right = copy(page) | |
# Get height and width of what the output pages should have. | |
output_page_height = page_left.mediaBox.getUpperRight_y() | |
output_page_width = page_left.mediaBox.getUpperRight_x() / 2 | |
# Crop the left page. | |
page_left.trimBox.lowerLeft = (0, 0) | |
page_left.trimBox.upperRight = (output_page_width, output_page_height) | |
page_left.cropBox.lowerLeft = (0, 0) | |
page_left.cropBox.upperRight = (output_page_width, output_page_height) | |
# Crop the right page. | |
page_right.trimBox.lowerLeft = (output_page_width, 0) | |
page_right.trimBox.upperRight = (output_page_width * 2, output_page_height) | |
page_right.cropBox.lowerLeft = (output_page_width, 0) | |
page_right.cropBox.upperRight = (output_page_width * 2, output_page_height) | |
return page_left, page_right | |
def main(): | |
if len(sys.argv) < 2: | |
sys.exit('You must specify a file path for the pdf file.') | |
file_input = get_file_input(sys.argv[1]) | |
output = PdfFileWriter() | |
for i in range(file_input.getNumPages()): | |
print 'Splitting page {} into {} and {}'.format(i, i * 2, i * 2 + 1) | |
page_left, page_right = get_pages(file_input.getPage(i)) | |
output.addPage(page_left) | |
output.addPage(page_right) | |
print 'Saving new file as output.pdf' | |
output_stream = file('output.pdf', 'wb') | |
output.write(output_stream) | |
output_stream.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment