Last active
October 26, 2018 23:54
-
-
Save tremby/301892548b070fe781c9de8a72406fcd to your computer and use it in GitHub Desktop.
Wrapper for pdftk which stamps particular pages with patch PDFs; see --help
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import os | |
import shutil | |
import string | |
import subprocess | |
import tempfile | |
# Ensure required executables exist | |
for prog in ('pdftk', 'ps2pdf'): | |
if shutil.which(prog) is None: | |
raise RuntimeError("The program {prog} is required".format(prog=prog)) | |
def toBase26(num): | |
digits = string.ascii_uppercase | |
if num == 0: | |
return digits[0] | |
out = [] | |
while num: | |
out.append(digits[int(num % 26)]) | |
num = int(num // 26) | |
out.reverse() | |
return ''.join(out) | |
parser = argparse.ArgumentParser(description="Patch particular pages of a PDF", | |
epilog="Example: %(prog)s contract.pdf 2=sig-p2.pdf 3=sig-p3.pdf signed.pdf") | |
def pagePdfPair(string): | |
try: | |
(page, pdf) = string.split('=', 1) | |
if not len(pdf): | |
raise ValueError | |
except ValueError: | |
parser.error("Expected a page=PDF pair") | |
try: | |
page = int(page) | |
except ValueError: | |
parser.error("Page number must be an integer") | |
if page < 1: | |
parser.error("Page number must be greater than zero") | |
return (page, pdf) | |
parser.add_argument('inputPdf', type=str, | |
help="the input file, particular pages of which will be stamped") | |
parser.add_argument('patchPdfs', metavar="N=patchPdf", type=pagePdfPair, nargs='+', | |
help="a page number and patch PDF pair") | |
parser.add_argument('outputPdf', type=str, | |
help="the output file") | |
parser.add_argument('-f', '--force', action='store_true', | |
help="force writing to the given output file even if it already exists") | |
args = parser.parse_args() | |
# Prevent overwriting output file | |
if not args.force and os.path.isfile(args.outputPdf): | |
parser.error("Output file {filename} already exists and --force not specified".format(filename=args.outputPdf)) | |
# Ensure all input files exist | |
if not os.path.isfile(args.inputPdf): | |
parser.error("Input file {filename} doesn't exist".format(filename=args.inputPdf)) | |
for (page, pdf) in args.patchPdfs: | |
if not os.path.isfile(pdf): | |
parser.error("Patch file {filename} doesn't exist".format(filename=pdf)) | |
# Get number of pages in input file | |
data = subprocess.run(['pdftk', args.inputPdf, 'dump_data'], stdout=subprocess.PIPE, encoding='utf8') | |
totalPages = None | |
for line in data.stdout.splitlines(): | |
if line.startswith('NumberOfPages: '): | |
totalPages = int(''.join(filter(str.isdigit, line))) | |
break | |
# Check no patch page numbers are out of range | |
for (page, pdf) in args.patchPdfs: | |
if page > totalPages: | |
parser.error("The input PDF has a page count of {total}, so page {page} cannot be patched".format(total=totalPages, page=page)) | |
# Sort patch pages by page number | |
# Python sorts by first member by default, which makes this easy | |
args.patchPdfs.sort() | |
def getPatchForPage(p): | |
for (page, pdf) in args.patchPdfs: | |
if page == p: | |
return 'PATCH{handle}'.format(handle=toBase26(page)) | |
return 'BLANK' | |
# Get a temporary context directory | |
with tempfile.TemporaryDirectory() as tempdir: | |
# Make a blank page PDF | |
blankPdf = os.path.join(tempdir, 'blank.pdf') | |
subprocess.run(['ps2pdf', '-sPAPERSIZE=a4', '-', blankPdf], input="") | |
# Make a patch PDF with the same total number of pages as the input PDF | |
pdftkargs = ['pdftk', 'BLANK={filename}'.format(filename=blankPdf)] | |
for (page, pdf) in args.patchPdfs: | |
pdftkargs.append('PATCH{handle}={filename}'.format(handle=toBase26(page), filename=pdf)) | |
pdftkargs.append('cat') | |
for page in range(1, totalPages + 1): | |
pdftkargs.append(getPatchForPage(page)) | |
pdftkargs.append('output') | |
patchPdf = os.path.join(tempdir, 'patch.pdf') | |
pdftkargs.append(patchPdf) | |
subprocess.run(pdftkargs) | |
# Stamp the patch PDF on the input PDF | |
subprocess.run(['pdftk', args.inputPdf, 'multistamp', patchPdf, 'output', args.outputPdf]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment