Created
October 3, 2019 19:30
-
-
Save markwk/c0157ffce0ed0308c7647ab387986908 to your computer and use it in GitHub Desktop.
Append a Table of Contents (TOC) to a PDF Document on Mac using Only Python and No External Dependencies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# | |
############################# | |
# | |
# Append a Table of Contents (TOC) to a PDF Document on Mac using Python | |
# | |
# This script involves no external dependencies. Works and tested on Mac's default version of Python. | |
# | |
# Usage: python pdf_toc_processor.py -i <path-to-target.pdf> -b <path-to-bookmarks-file.txt> -o <path-to-output.pdf> | |
# | |
# Sample Booksmarks file should look like this: | |
# 1, Cover | |
# 2, August 20 2018 @ Singapore | |
# 4, August 22 @ Los Angeles | |
# | |
# | |
# Code Adapted from https://apple.stackexchange.com/a/348219 | |
# Created by markwk | github.com/markwk/ | |
from Foundation import NSURL, NSString | |
import Quartz as Quartz | |
import sys, getopt | |
import csv | |
def getOutline(PDF_File, page, label): | |
# helper function for generating TOC outline | |
myPage = PDF_File.pageAtIndex_(page) | |
pageSize = myPage.boundsForBox_(Quartz.kCGPDFMediaBox) | |
x = 0 | |
y = Quartz.CGRectGetMaxY(pageSize) | |
pagePoint = Quartz.CGPointMake(x,y) | |
myDestination = Quartz.PDFDestination.alloc().initWithPage_atPoint_(myPage, pagePoint) | |
myLabel = NSString.stringWithString_(label) | |
myOutline = Quartz.PDFOutline.alloc().init() | |
myOutline.setLabel_(myLabel) | |
myOutline.setDestination_(myDestination) | |
return myOutline | |
def main(argv): | |
# provide input options for script | |
inputfile = '' | |
outputfile = '' | |
bookmarksfile = '' | |
try: | |
opts, args = getopt.getopt(argv,"hi:o:b:",["ifile=","ofile=","bfile="]) | |
except getopt.GetoptError: | |
print 'ERROR: script.py -i <inputfile> -b <bookmarksfile> -o <outputfile>' | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
print 'test.py -i <inputfile> -b <bookmarksfile> -o <outputfile>' | |
sys.exit() | |
elif opt in ("-i", "--ifile"): | |
inputfile = arg | |
elif opt in ("-b", "--bfile"): | |
bookmarksfile = arg | |
elif opt in ("-o", "--ofile"): | |
outputfile = arg | |
# print 'Input file is:', inputfile | |
# print 'Bookmarks file is "', bookmarksfile | |
# print 'Output file is:', outputfile | |
pdfURL = NSURL.fileURLWithPath_(inputfile) | |
myPDF = Quartz.PDFDocument.alloc().initWithURL_(pdfURL) | |
if myPDF: | |
if bookmarksfile: | |
with open(bookmarksfile) as csvfile: | |
# read csv file and parse | |
readCSV = csv.reader(csvfile, delimiter=',') | |
row_count = 0 | |
# set initial outline | |
rootOutline = Quartz.PDFOutline.alloc().init() | |
for row in readCSV: | |
pg = int(row[0]) - 1 | |
text = row[1] | |
# add item to outline | |
outline_item = getOutline(myPDF, pg, text) | |
rootOutline.insertChild_atIndex_(outline_item, row_count) | |
row_count = row_count + 1 | |
myPDF.setOutlineRoot_(rootOutline) | |
myPDF.writeToFile_(outputfile) | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment