Skip to content

Instantly share code, notes, and snippets.

@mystix
Last active October 3, 2025 07:53
Show Gist options
  • Save mystix/8c8733650f67c66eb55600af91f59125 to your computer and use it in GitHub Desktop.
Save mystix/8c8733650f67c66eb55600af91f59125 to your computer and use it in GitHub Desktop.
OSCP PEN-200: Add Table of Contents page links + bookmarks to PDF
#!/usr/bin/env -S uv run --script
#
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "pymupdf",
# ]
# ///
import argparse
import pymupdf
import re
def add_toc(pdfin, toc_start, toc_end, pdfout):
toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"
with pymupdf.open(pdfin) as pdf:
toc = []
for page in pdf[toc_start:toc_end]:
for section in page.get_text("blocks"):
toc_match = re.match(toc_section_regex, section[4])
if toc_match:
section_number = toc_match.group(1)
section_header = toc_match.group(2).strip()
pg_number = int(toc_match.group(3))
for pg_block in pdf[pg_number - 1].get_text("blocks"):
pg_match = re.match(pg_section_regex, pg_block[4])
# check if TOC section header contains page section header
# (NOTE: page section header might be truncated i.e. spread over 2 lines)
if pg_match and pg_match.group(2) in section_header:
destination = {
"kind": pymupdf.LINK_GOTO,
"page": pg_number - 1,
"from": pymupdf.Rect(*section[0:4]),
"to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect
}
# add TOC link
page.insert_link(destination)
# add bookmark entry
toc.append([len(section_number.split(".")), f"{section_number} - {section_header}", pg_number, destination])
# add bookmark entry for TOC
toc.insert(0, [1, "Table of Contents", toc_start + 1])
pdf.set_toc(toc)
pdf.save(pdfout)
def main():
parser = argparse.ArgumentParser(
description="Add section links and PDF bookmarks to the OSCP PEN-200 PDF table of contents.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("pdf_in", help="Input PDF")
parser.add_argument("toc_start", help="First page of the Table of Contents", type=int)
parser.add_argument("toc_end", help="Last page of the Table of Contents", type=int)
parser.add_argument("pdf_out", help="Output PDF")
args = parser.parse_args()
add_toc(args.pdf_in, args.toc_start, args.toc_end, args.pdf_out)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment