alexanderankin · October 13, 2018 14:10 · alexanderankin · May 10, 2017
diff --git a/bookmark.py b/bookmark.py
 from collections import OrderedDict as o_dict

 from fpdf import FPDF
 from fpdf.util import textstring

 def create_dictionary_string(dict_):
    """format ordered dictionary as PDF dictionary"""
    return ''.join([
        '<<',
        '\n'.join([' '.join(f) for f in dict_.items()]),
        ' >>'
    ])
 def create_list_string(list_):
    """format list of strings as PDF array"""
    return '[' + ' '.join(list_) + ']'

 def iobj_ref(n):
    """format an indirect PDF Object reference from its id number"""
    return str(n) + ' 0 R'

 class Bookmark(FPDF):
    def __init__(self, orientation='P', unit='mm', format='A4'):
        super(Bookmark, self).__init__(orientation, unit, format)
        self.outlines = []
        self.outline_root_obj_reference = None
        self.current_page_number = 0

    def bookmark(self, txt, level=0, y=0):
        if y == -1:
            y = self.get_y()
        self.outlines.append({
            't': txt, 'level': level, 'y': y,
            'p': self.current_page_number
        })

    def add_page(self, *a, **k):
        if not self.current_page_number:
            self.current_page_number = 1
        else:
            self.current_page_number += 1
        super(Bookmark, self).add_page(*a, **k)

    def _putbookmarks(self):
        """Print Bookmark tags into the resources section.

        If the accumulated list of bookmarks is empty, this is a no-op.

        First, some assembly required in order to organize a flat list into a
        tree. Then, after all nested indirect references are in place, make and
        dictionary for each bookmark, and make and insert the dictionary for
        the document outline.
        """
        if not self.outlines:
            return
        lru = {}
        last_level = 0
        for index, outline_el in enumerate(self.outlines):
            if outline_el['level'] > 0:
                # Set parent and last pointers
                parent = lru[outline_el['level'] - 1]
                outline_el['parent'] = parent
                self.outlines[parent]['last'] = index

                if outline_el['level'] > last_level:
                    # Level increasing: set first pointer
                    self.outlines[parent]['first'] = index
            else:
                outline_el['parent'] = len(self.outlines)
            if outline_el['level'] <= last_level and index > 0:
                # Set prev and next pointers
                prev = lru[outline_el['level']]
                self.outlines[prev]['next'] = index

                outline_el['prev'] = prev

            lru[outline_el['level']] = index
            last_level = outline_el['level']
        # Outline items
        n = self.n + 1

        for index, outline_el in enumerate(self.outlines):
            elem_fields = o_dict()
            elem_fields['/Title'] = textstring(outline_el['t'])
            elem_fields['/Parent'] = iobj_ref(n + outline_el['parent'])

            for opt in ['prev', 'next', 'first', 'last']:
                if opt in outline_el:
                    tag = '/' + opt.capitalize()
                    elem_fields[tag] = iobj_ref(n + outline_el[opt])

            # page 582 PDF Reference 1.7
            elem_fields['/Dest'] = create_list_string([
                # page object reference
                iobj_ref(1 + 2 * outline_el['p']),
                '/XYZ',  # vs /Fit, /FitH, /FitV, /FitR, etc...
                # left, top
                '0', '%.2f' % ((self.h - outline_el['y']) * self.k),
                'null'  # zoom
            ])
            elem_fields['/Count'] = '0'
            
            self._newobj()
            self._out(create_dictionary_string(elem_fields))
            self._out('endobj')

        # Outline object (page 585 PDF Reference 1.7)
        outlines_dictionary = o_dict()
        outlines_dictionary['/Type'] = '/Outlines'
        outlines_dictionary['/First'] = iobj_ref(n)
        outlines_dictionary['/Last'] = iobj_ref(n + lru[0])
        

        self._newobj()
        outlines_string = create_dictionary_string(outlines_dictionary)

        # writing three lines of deletable code is easier than windows & 
        # re-generating test hashes.
        outlines_mutable_string = bytearray(outlines_string, 'ascii')
        outlines_mutable_string[17] = ord(' ')
        outlines_string = outlines_mutable_string.decode('ascii')

        self._out(outlines_string)
        self._out('endobj')

        # Saved for Catalog Dictionary
        self.outline_root_obj_reference = self.n

    def _putresources(self):
            super(Bookmark, self)._putresources()
            self._putbookmarks()

    def _putcatalog(self):
        super(Bookmark, self)._putcatalog()
        if self.outlines:
            self._out('/Outlines ' + iobj_ref(self.outline_root_obj_reference))
            self._out('/PageMode /UseOutlines')

 if __name__ == '__main__':
    pass
diff --git a/test.py b/test.py
 import bookmark

 import os
 from test_utils import (
  set_doc_date_0, calculate_hash_of_file
 )

 def main_1():
  doc = bookmark.Bookmark()
  set_doc_date_0(doc)
  doc.set_font('Arial', size=12)
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World')
  doc.bookmark(txt='hello world')
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world2')
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world3')
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world4')
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world5')

  outfile = "523061d10e720fc353e1c2899558de4e.pdf.test"
  doc.output(outfile)

  good = "523061d10e720fc353e1c2899558de4e"
  assert(good == calculate_hash_of_file(outfile))
  os.unlink(outfile)

 def main_2():
  doc = bookmark.Bookmark()
  set_doc_date_0(doc)
  doc.set_font('Arial', size=12)
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World')
  doc.bookmark(txt='hello world')
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world2')
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world3', level=1)
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world4')
  
  doc.add_page()
  doc.cell(w = 0, txt = 'Hello World2')
  doc.bookmark(txt='hello world5')

  outfile = "bc3db4ad8dd3c80944f38e5eaae52883.pdf.test"
  doc.output(outfile)

  good = "bc3db4ad8dd3c80944f38e5eaae52883"
  # print(calculate_hash_of_file(outfile))
  assert(good == calculate_hash_of_file(outfile))
  os.unlink(outfile)

 if __name__ == '__main__':
  main_1()
  main_2()
diff --git a/test_utils.py b/test_utils.py
 import inspect
 import sys
 import os
 import hashlib
 import datetime

 def set_doc_date_0(doc):
  """Sets the document date to unix epoch start."""
  # 1969-12-31 19:00:00
  time_tuple = (1969, 12, 31, 19, 00, 00)
  zero = datetime.datetime(*time_tuple)
  doc.set_creation_date(zero)

 def calculate_hash_of_file(full_path):
  """Finds md5 hash of a file given an abs path, reading in whole file."""
  with open(full_path, 'rb') as file:
    data = file.read()
  return hashlib.md5(data).hexdigest()

 def relative_path_to(place):
  """Finds Relative Path to a place

  Works by getting the file of the caller module, then joining the directory
  of that absolute path and the place in the argument.
  """
  caller_file = inspect.getfile(sys._getframe(1))
  return os.path.join(os.path.dirname(os.path.abspath(caller_file)), place)
	from collections import OrderedDict as o_dict

	from fpdf import FPDF
	from fpdf.util import textstring

	def create_dictionary_string(dict_):
	"""format ordered dictionary as PDF dictionary"""
	return ''.join([
	'<<',
	'\n'.join([' '.join(f) for f in dict_.items()]),
	' >>'
	])
	def create_list_string(list_):
	"""format list of strings as PDF array"""
	return '[' + ' '.join(list_) + ']'

	def iobj_ref(n):
	"""format an indirect PDF Object reference from its id number"""
	return str(n) + ' 0 R'

	class Bookmark(FPDF):
	def __init__(self, orientation='P', unit='mm', format='A4'):
	super(Bookmark, self).__init__(orientation, unit, format)
	self.outlines = []
	self.outline_root_obj_reference = None
	self.current_page_number = 0

	def bookmark(self, txt, level=0, y=0):
	if y == -1:
	y = self.get_y()
	self.outlines.append({
	't': txt, 'level': level, 'y': y,
	'p': self.current_page_number
	})

	def add_page(self, a, *k):
	if not self.current_page_number:
	self.current_page_number = 1
	else:
	self.current_page_number += 1
	super(Bookmark, self).add_page(a, *k)

	def _putbookmarks(self):
	"""Print Bookmark tags into the resources section.

	If the accumulated list of bookmarks is empty, this is a no-op.

	First, some assembly required in order to organize a flat list into a
	tree. Then, after all nested indirect references are in place, make and
	dictionary for each bookmark, and make and insert the dictionary for
	the document outline.
	"""
	if not self.outlines:
	return
	lru = {}
	last_level = 0
	for index, outline_el in enumerate(self.outlines):
	if outline_el['level'] > 0:
	# Set parent and last pointers
	parent = lru[outline_el['level'] - 1]
	outline_el['parent'] = parent
	self.outlines[parent]['last'] = index

	if outline_el['level'] > last_level:
	# Level increasing: set first pointer
	self.outlines[parent]['first'] = index
	else:
	outline_el['parent'] = len(self.outlines)
	if outline_el['level'] <= last_level and index > 0:
	# Set prev and next pointers
	prev = lru[outline_el['level']]
	self.outlines[prev]['next'] = index

	outline_el['prev'] = prev

	lru[outline_el['level']] = index
	last_level = outline_el['level']
	# Outline items
	n = self.n + 1

	for index, outline_el in enumerate(self.outlines):
	elem_fields = o_dict()
	elem_fields['/Title'] = textstring(outline_el['t'])
	elem_fields['/Parent'] = iobj_ref(n + outline_el['parent'])

	for opt in ['prev', 'next', 'first', 'last']:
	if opt in outline_el:
	tag = '/' + opt.capitalize()
	elem_fields[tag] = iobj_ref(n + outline_el[opt])

	# page 582 PDF Reference 1.7
	elem_fields['/Dest'] = create_list_string([
	# page object reference
	iobj_ref(1 + 2 * outline_el['p']),
	'/XYZ', # vs /Fit, /FitH, /FitV, /FitR, etc...
	# left, top
	'0', '%.2f' % ((self.h - outline_el['y']) * self.k),
	'null' # zoom
	])
	elem_fields['/Count'] = '0'

	self._newobj()
	self._out(create_dictionary_string(elem_fields))
	self._out('endobj')

	# Outline object (page 585 PDF Reference 1.7)
	outlines_dictionary = o_dict()
	outlines_dictionary['/Type'] = '/Outlines'
	outlines_dictionary['/First'] = iobj_ref(n)
	outlines_dictionary['/Last'] = iobj_ref(n + lru[0])


	self._newobj()
	outlines_string = create_dictionary_string(outlines_dictionary)

	# writing three lines of deletable code is easier than windows &
	# re-generating test hashes.
	outlines_mutable_string = bytearray(outlines_string, 'ascii')
	outlines_mutable_string[17] = ord(' ')
	outlines_string = outlines_mutable_string.decode('ascii')

	self._out(outlines_string)
	self._out('endobj')

	# Saved for Catalog Dictionary
	self.outline_root_obj_reference = self.n

	def _putresources(self):
	super(Bookmark, self)._putresources()
	self._putbookmarks()

	def _putcatalog(self):
	super(Bookmark, self)._putcatalog()
	if self.outlines:
	self._out('/Outlines ' + iobj_ref(self.outline_root_obj_reference))
	self._out('/PageMode /UseOutlines')

	if __name__ == '__main__':
	pass
	import bookmark

	import os
	from test_utils import (
	set_doc_date_0, calculate_hash_of_file
	)

	def main_1():
	doc = bookmark.Bookmark()
	set_doc_date_0(doc)
	doc.set_font('Arial', size=12)

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World')
	doc.bookmark(txt='hello world')

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world2')

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world3')

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world4')

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world5')

	outfile = "523061d10e720fc353e1c2899558de4e.pdf.test"
	doc.output(outfile)

	good = "523061d10e720fc353e1c2899558de4e"
	assert(good == calculate_hash_of_file(outfile))
	os.unlink(outfile)

	def main_2():
	doc = bookmark.Bookmark()
	set_doc_date_0(doc)
	doc.set_font('Arial', size=12)

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World')
	doc.bookmark(txt='hello world')

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world2')

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world3', level=1)

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world4')

	doc.add_page()
	doc.cell(w = 0, txt = 'Hello World2')
	doc.bookmark(txt='hello world5')

	outfile = "bc3db4ad8dd3c80944f38e5eaae52883.pdf.test"
	doc.output(outfile)

	good = "bc3db4ad8dd3c80944f38e5eaae52883"
	# print(calculate_hash_of_file(outfile))
	assert(good == calculate_hash_of_file(outfile))
	os.unlink(outfile)

	if __name__ == '__main__':
	main_1()
	main_2()
	import inspect
	import sys
	import os
	import hashlib
	import datetime

	def set_doc_date_0(doc):
	"""Sets the document date to unix epoch start."""
	# 1969-12-31 19:00:00
	time_tuple = (1969, 12, 31, 19, 00, 00)
	zero = datetime.datetime(*time_tuple)
	doc.set_creation_date(zero)

	def calculate_hash_of_file(full_path):
	"""Finds md5 hash of a file given an abs path, reading in whole file."""
	with open(full_path, 'rb') as file:
	data = file.read()
	return hashlib.md5(data).hexdigest()

	def relative_path_to(place):
	"""Finds Relative Path to a place

	Works by getting the file of the caller module, then joining the directory
	of that absolute path and the place in the argument.
	"""
	caller_file = inspect.getfile(sys._getframe(1))
	return os.path.join(os.path.dirname(os.path.abspath(caller_file)), place)