Skip to content

Instantly share code, notes, and snippets.

@dpetrov
Created October 24, 2019 14:27
Show Gist options
  • Save dpetrov/af95ee5e3380a9844fbf22ef0570134a to your computer and use it in GitHub Desktop.
Save dpetrov/af95ee5e3380a9844fbf22ef0570134a to your computer and use it in GitHub Desktop.
Joing pages from a a collection of PDF files into a single PDF file
#! /usr/bin/python
#
# join
# Joing pages from a a collection of PDF files into a single PDF file.
#
# join [--output <file>] [--shuffle] [--reverse] [--verbose]"
#
# Parameter:
#
# --shuffle
# Take a page from each PDF input file in turn before taking another from each file.
# If this option is not specified then all of the pages from a PDF file are appended
# to the output PDF file before the next input PDF file is processed.
#
# --reverse
# Reverse the pages of the even input filenames
#
# --verbose
# Write information about the doings of this tool to stderr.
#
import sys
import os
import getopt
import tempfile
import shutil
from CoreFoundation import *
from Quartz.CoreGraphics import *
verbose = False
def createPDFDocumentWithPath(path):
global verbose
if verbose:
print "Creating PDF document from file %s" % (path)
return CGPDFDocumentCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, path, len(path), False))
def writePageFromDoc(writeContext, doc, pageNum):
global verbose
page = CGPDFDocumentGetPage(doc, pageNum)
if page:
mediaBox = CGPDFPageGetBoxRect(page, kCGPDFMediaBox)
if CGRectIsEmpty(mediaBox):
mediaBox = None
CGContextBeginPage(writeContext, mediaBox)
CGContextDrawPDFPage(writeContext, page)
CGContextEndPage(writeContext)
if verbose:
print "Copied page %d from %s" % (pageNum, doc)
def shufflePages(writeContext, docs, maxPages):
for pageNum in xrange(1, maxPages + 1):
for doc in docs:
writePageFromDoc(writeContext, doc, pageNum)
def shuffleReversePages(writeContext, docs, maxPages):
for pageNum in xrange(1, maxPages + 1):
for index, doc in enumerate(docs):
if index % 2 == 0 or maxPages == 1:
writePageFromDoc(writeContext, doc, pageNum)
elif maxPages % 2 == 0:
writePageFromDoc(writeContext, doc, maxPages - pageNum + 1)
else:
writePageFromDoc(writeContext, doc, maxPages - pageNum)
def append(writeContext, docs, maxPages):
for doc in docs:
for pageNum in xrange(1, maxPages + 1) :
writePageFromDoc(writeContext, doc, pageNum)
def main(argv):
global verbose
# The PDF context we will draw into to create a new PDF
writeContext = None
# If True then generate more verbose information
source = None
shuffle = False
reverse = False
# Parse the command line options
try:
options, args = getopt.getopt(argv, "o:srv", ["output=", "shuffle", "reverse", "verbose"])
except getopt.GetoptError:
usage()
sys.exit(2)
for option, arg in options:
if option in ("-o", "--output") :
if verbose:
print "Setting %s as the destination." % (arg)
writeContext = CGPDFContextCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, arg, len(arg), False), None, None)
elif option in ("-s", "--shuffle") :
if verbose :
print "Shuffle pages to the output file."
shuffle = True
elif option in ("-r", "--reverse") :
if verbose :
print "Reverse shuffle pages to the output file."
reverse = True
elif option in ("-v", "--verbose") :
print "Verbose mode enabled."
verbose = True
else :
print "Unknown argument: %s" % (option)
if writeContext:
# create PDFDocuments for all of the files.
docs = map(createPDFDocumentWithPath, sorted(args))
# find the maximum number of pages.
maxPages = 0
for doc in docs:
if CGPDFDocumentGetNumberOfPages(doc) > maxPages:
maxPages = CGPDFDocumentGetNumberOfPages(doc)
if reverse:
shuffleReversePages(writeContext, docs, maxPages)
elif shuffle:
shufflePages(writeContext, docs, maxPages)
else:
append(writeContext, docs, maxPages)
CGPDFContextClose(writeContext)
del writeContext
#CGContextRelease(writeContext)
def usage():
print "Usage: join [--output <file>] [--shuffle] [--verbose]"
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment