Skip to content

Instantly share code, notes, and snippets.

@roliveira
Last active October 8, 2020 20:41
Show Gist options
  • Save roliveira/a18f6a16754edc9caa3424d9fa1e5d6d to your computer and use it in GitHub Desktop.
Save roliveira/a18f6a16754edc9caa3424d9fa1e5d6d to your computer and use it in GitHub Desktop.
Crop, split and collate PDFs using pyPdf
import argparse
from pyPdf import PdfFileWriter, PdfFileReader
def create_parsers():
p = argparse.ArgumentParser(
prog='crop',
description='"%(prog)s" crop pdfs',
)
p.add_argument(
'-i', '--input',
type=str,
nargs='*',
required=True,
help='Input pdf',
)
p.add_argument(
'-o', '--output',
type=str,
required=True,
help='Output pdf',
)
return p
if __name__ == '__main__':
p = create_parsers()
args = p.parse_args()
input_filename = args.input
output_filename = args.output
if len(input_filename) < 2:
raise("Input must contain more than one document")
for i in range(len(input_filename)-1):
left_pdf = PdfFileReader(file(input_filename[i], "rb"))
right_pdf = PdfFileReader(file(input_filename[i+1], "rb"))
output = PdfFileWriter()
# get the first page from each pdf
left_page = left_pdf.pages[0]
right_page = right_pdf.pages[0]
page = output.addBlankPage(
width=left_page.mediaBox.getWidth() + right_page.mediaBox.getWidth(),
height=max(left_page.mediaBox.getHeight(), right_page.mediaBox.getHeight()),
)
# draw the pages on that new page
page.mergeTranslatedPage(left_page, 0, 0)
page.mergeTranslatedPage(right_page, left_page.mediaBox.getWidth(), 0)
# write to file
outputStream = file(output_filename, "wb")
output.write(outputStream)
outputStream.close()
#!/usr/bin/python
import argparse
from pyPdf import PdfFileWriter, PdfFileReader
def create_parsers():
p = argparse.ArgumentParser(
prog='crop',
description='"%(prog)s" crop pdfs',
)
p.add_argument(
'-i', '--input',
type=str,
required=True,
help='Input pdf',
)
p.add_argument(
'-o', '--output',
type=str,
required=True,
help='Output pdf',
)
p.add_argument(
'-b', '--bbox',
type=float,
nargs='*',
required=True,
help='Bounding box [y0 y1 x0 x1]',
)
return p
if __name__ == '__main__':
p = create_parsers()
args = p.parse_args()
input_filename = args.input
output_filename = args.output
bounding_box = args.bbox
input1 = PdfFileReader(file(input_filename, "rb"))
output = PdfFileWriter()
numPages = input1.getNumPages()
print "document has %s pages." % numPages
for i in range(numPages):
page = input1.getPage(i)
print page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y()
page.trimBox.lowerLeft = (bounding_box[0], bounding_box[2])
page.trimBox.upperRight = (bounding_box[1], bounding_box[3])
page.cropBox.lowerLeft = (bounding_box[0], bounding_box[2])
page.cropBox.upperRight = (bounding_box[1], bounding_box[3])
output.addPage(page)
outputStream = file(output_filename, "wb")
output.write(outputStream)
outputStream.close()
#!/usr/bin/python
import os
import argparse
from pyPdf import PdfFileWriter, PdfFileReader
def create_parsers():
p = argparse.ArgumentParser(
prog='crop',
description='"%(prog)s" split pdfs',
)
p.add_argument(
'-i', '--input',
type=str,
required=True,
help='Input pdf',
)
return p
if __name__ == '__main__':
p = create_parsers()
args = p.parse_args()
input_filename = args.input
output_filename = os.path.splitext(input_filename)[0]
output_extension = os.path.splitext(input_filename)[-1]
inputpdf = PdfFileReader(open(input_filename, "rb"))
for i in xrange(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
with open(output_filename + str(i) + output_extension, "wb") as outputStream:
output.write(outputStream)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment