Last active
October 13, 2018 14:10
-
-
Save alexanderankin/d59c999f1dbde5b1439b54958d79e8ca to your computer and use it in GitHub Desktop.
fpdf2 Bookmarks Feature Draft Specification
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import OrderedDict as o_dict | |
from fpdf import FPDF | |
from fpdf.util import textstring | |
def create_dictionary_string(dict_): | |
"""format ordered dictionary as PDF dictionary""" | |
return ''.join([ | |
'<<', | |
'\n'.join([' '.join(f) for f in dict_.items()]), | |
' >>' | |
]) | |
def create_list_string(list_): | |
"""format list of strings as PDF array""" | |
return '[' + ' '.join(list_) + ']' | |
def iobj_ref(n): | |
"""format an indirect PDF Object reference from its id number""" | |
return str(n) + ' 0 R' | |
class Bookmark(FPDF): | |
def __init__(self, orientation='P', unit='mm', format='A4'): | |
super(Bookmark, self).__init__(orientation, unit, format) | |
self.outlines = [] | |
self.outline_root_obj_reference = None | |
self.current_page_number = 0 | |
def bookmark(self, txt, level=0, y=0): | |
if y == -1: | |
y = self.get_y() | |
self.outlines.append({ | |
't': txt, 'level': level, 'y': y, | |
'p': self.current_page_number | |
}) | |
def add_page(self, *a, **k): | |
if not self.current_page_number: | |
self.current_page_number = 1 | |
else: | |
self.current_page_number += 1 | |
super(Bookmark, self).add_page(*a, **k) | |
def _putbookmarks(self): | |
"""Print Bookmark tags into the resources section. | |
If the accumulated list of bookmarks is empty, this is a no-op. | |
First, some assembly required in order to organize a flat list into a | |
tree. Then, after all nested indirect references are in place, make and | |
dictionary for each bookmark, and make and insert the dictionary for | |
the document outline. | |
""" | |
if not self.outlines: | |
return | |
lru = {} | |
last_level = 0 | |
for index, outline_el in enumerate(self.outlines): | |
if outline_el['level'] > 0: | |
# Set parent and last pointers | |
parent = lru[outline_el['level'] - 1] | |
outline_el['parent'] = parent | |
self.outlines[parent]['last'] = index | |
if outline_el['level'] > last_level: | |
# Level increasing: set first pointer | |
self.outlines[parent]['first'] = index | |
else: | |
outline_el['parent'] = len(self.outlines) | |
if outline_el['level'] <= last_level and index > 0: | |
# Set prev and next pointers | |
prev = lru[outline_el['level']] | |
self.outlines[prev]['next'] = index | |
outline_el['prev'] = prev | |
lru[outline_el['level']] = index | |
last_level = outline_el['level'] | |
# Outline items | |
n = self.n + 1 | |
for index, outline_el in enumerate(self.outlines): | |
elem_fields = o_dict() | |
elem_fields['/Title'] = textstring(outline_el['t']) | |
elem_fields['/Parent'] = iobj_ref(n + outline_el['parent']) | |
for opt in ['prev', 'next', 'first', 'last']: | |
if opt in outline_el: | |
tag = '/' + opt.capitalize() | |
elem_fields[tag] = iobj_ref(n + outline_el[opt]) | |
# page 582 PDF Reference 1.7 | |
elem_fields['/Dest'] = create_list_string([ | |
# page object reference | |
iobj_ref(1 + 2 * outline_el['p']), | |
'/XYZ', # vs /Fit, /FitH, /FitV, /FitR, etc... | |
# left, top | |
'0', '%.2f' % ((self.h - outline_el['y']) * self.k), | |
'null' # zoom | |
]) | |
elem_fields['/Count'] = '0' | |
self._newobj() | |
self._out(create_dictionary_string(elem_fields)) | |
self._out('endobj') | |
# Outline object (page 585 PDF Reference 1.7) | |
outlines_dictionary = o_dict() | |
outlines_dictionary['/Type'] = '/Outlines' | |
outlines_dictionary['/First'] = iobj_ref(n) | |
outlines_dictionary['/Last'] = iobj_ref(n + lru[0]) | |
self._newobj() | |
outlines_string = create_dictionary_string(outlines_dictionary) | |
# writing three lines of deletable code is easier than windows & | |
# re-generating test hashes. | |
outlines_mutable_string = bytearray(outlines_string, 'ascii') | |
outlines_mutable_string[17] = ord(' ') | |
outlines_string = outlines_mutable_string.decode('ascii') | |
self._out(outlines_string) | |
self._out('endobj') | |
# Saved for Catalog Dictionary | |
self.outline_root_obj_reference = self.n | |
def _putresources(self): | |
super(Bookmark, self)._putresources() | |
self._putbookmarks() | |
def _putcatalog(self): | |
super(Bookmark, self)._putcatalog() | |
if self.outlines: | |
self._out('/Outlines ' + iobj_ref(self.outline_root_obj_reference)) | |
self._out('/PageMode /UseOutlines') | |
if __name__ == '__main__': | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bookmark | |
import os | |
from test_utils import ( | |
set_doc_date_0, calculate_hash_of_file | |
) | |
def main_1(): | |
doc = bookmark.Bookmark() | |
set_doc_date_0(doc) | |
doc.set_font('Arial', size=12) | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World') | |
doc.bookmark(txt='hello world') | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world2') | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world3') | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world4') | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world5') | |
outfile = "523061d10e720fc353e1c2899558de4e.pdf.test" | |
doc.output(outfile) | |
good = "523061d10e720fc353e1c2899558de4e" | |
assert(good == calculate_hash_of_file(outfile)) | |
os.unlink(outfile) | |
def main_2(): | |
doc = bookmark.Bookmark() | |
set_doc_date_0(doc) | |
doc.set_font('Arial', size=12) | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World') | |
doc.bookmark(txt='hello world') | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world2') | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world3', level=1) | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world4') | |
doc.add_page() | |
doc.cell(w = 0, txt = 'Hello World2') | |
doc.bookmark(txt='hello world5') | |
outfile = "bc3db4ad8dd3c80944f38e5eaae52883.pdf.test" | |
doc.output(outfile) | |
good = "bc3db4ad8dd3c80944f38e5eaae52883" | |
# print(calculate_hash_of_file(outfile)) | |
assert(good == calculate_hash_of_file(outfile)) | |
os.unlink(outfile) | |
if __name__ == '__main__': | |
main_1() | |
main_2() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import inspect | |
import sys | |
import os | |
import hashlib | |
import datetime | |
def set_doc_date_0(doc): | |
"""Sets the document date to unix epoch start.""" | |
# 1969-12-31 19:00:00 | |
time_tuple = (1969, 12, 31, 19, 00, 00) | |
zero = datetime.datetime(*time_tuple) | |
doc.set_creation_date(zero) | |
def calculate_hash_of_file(full_path): | |
"""Finds md5 hash of a file given an abs path, reading in whole file.""" | |
with open(full_path, 'rb') as file: | |
data = file.read() | |
return hashlib.md5(data).hexdigest() | |
def relative_path_to(place): | |
"""Finds Relative Path to a place | |
Works by getting the file of the caller module, then joining the directory | |
of that absolute path and the place in the argument. | |
""" | |
caller_file = inspect.getfile(sys._getframe(1)) | |
return os.path.join(os.path.dirname(os.path.abspath(caller_file)), place) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
usage:
pip install fpdf2
git clone https://gist.github.com/d59c999f1dbde5b1439b54958d79e8ca.git && cd d59c999f1dbde5b1439b54958d79e8ca && python test.py