Last active
October 3, 2025 07:53
-
-
Save mystix/8c8733650f67c66eb55600af91f59125 to your computer and use it in GitHub Desktop.
OSCP PEN-200: Add Table of Contents page links + bookmarks to PDF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S uv run --script | |
# | |
# /// script | |
# requires-python = ">=3.13" | |
# dependencies = [ | |
# "pymupdf", | |
# ] | |
# /// | |
import argparse | |
import pymupdf | |
import re | |
def add_toc(pdfin, toc_start, toc_end, pdfout): | |
toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" | |
pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n" | |
with pymupdf.open(pdfin) as pdf: | |
toc = [] | |
for page in pdf[toc_start:toc_end]: | |
for section in page.get_text("blocks"): | |
toc_match = re.match(toc_section_regex, section[4]) | |
if toc_match: | |
section_number = toc_match.group(1) | |
section_header = toc_match.group(2).strip() | |
pg_number = int(toc_match.group(3)) | |
for pg_block in pdf[pg_number - 1].get_text("blocks"): | |
pg_match = re.match(pg_section_regex, pg_block[4]) | |
# check if TOC section header contains page section header | |
# (NOTE: page section header might be truncated i.e. spread over 2 lines) | |
if pg_match and pg_match.group(2) in section_header: | |
destination = { | |
"kind": pymupdf.LINK_GOTO, | |
"page": pg_number - 1, | |
"from": pymupdf.Rect(*section[0:4]), | |
"to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect | |
} | |
# add TOC link | |
page.insert_link(destination) | |
# add bookmark entry | |
toc.append([len(section_number.split(".")), f"{section_number} - {section_header}", pg_number, destination]) | |
# add bookmark entry for TOC | |
toc.insert(0, [1, "Table of Contents", toc_start + 1]) | |
pdf.set_toc(toc) | |
pdf.save(pdfout) | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Add section links and PDF bookmarks to the OSCP PEN-200 PDF table of contents.", | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
) | |
parser.add_argument("pdf_in", help="Input PDF") | |
parser.add_argument("toc_start", help="First page of the Table of Contents", type=int) | |
parser.add_argument("toc_end", help="Last page of the Table of Contents", type=int) | |
parser.add_argument("pdf_out", help="Output PDF") | |
args = parser.parse_args() | |
add_toc(args.pdf_in, args.toc_start, args.toc_end, args.pdf_out) | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment