Skip to content

Instantly share code, notes, and snippets.

@HeimMatthias
Created March 13, 2025 09:57
Show Gist options
  • Select an option

  • Save HeimMatthias/6e224610b2476e92a4326dbe8f645731 to your computer and use it in GitHub Desktop.

Select an option

Save HeimMatthias/6e224610b2476e92a4326dbe8f645731 to your computer and use it in GitHub Desktop.
/*
* Use MuPDF's JavaScript API to copy the "table of contents" / document outline from one pdf file to another
* The script also copies styling information (italics / bold / color), by accessing this information via
* mupdf's PDFObject-API. It is mainly intended as a proof of concept for this type of operation.
* This is useful if you have to versions of the same pdf and want to copy the outline from one version to another
* Warning: the script performs no checks whether the outlined targets exist or make sense in the new document.
* Warning: this script removes existing outlines from the target document and overwrites it in place.
* Always keep a copy of the target document in case something goes horribly wrong.
* v.1 - 13.03.2025
* run as follows: node copytoc.mjs source.pdf target.pdf
*
* Copyright (C) 2025 Matthias Heim
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import * as fs from "fs";
import * as path from "path";
import { fileURLToPath } from 'url';
import * as mupdfjs from "mupdf/mupdfjs"
/**
* Copy the table of contents from one pdf file to another
* @param {string} pdfFilePathSource - Path to the PDF file FROM which the toc is copied
* @param {string} pdfFilePathTarget - Path to the PDF file TO which the toc is copied
*/
function copyToC(pdfFilePathSource, pdfFilePathTarget) {
try {
// get file paths
const sourceFileDir = path.dirname(pdfFilePathSource);
const sourceFileName = path.basename(pdfFilePathSource, '.pdf');
const targetFileDir = path.dirname(pdfFilePathTarget);
const targetFileName = path.basename(pdfFilePathTarget, '.pdf');
// open both files in mupdf
var sourceDoc = mupdfjs.PDFDocument.openDocument(fs.readFileSync(path.resolve(pdfFilePathSource)), "application/pdf");
var targetDoc = mupdfjs.PDFDocument.openDocument(fs.readFileSync(path.resolve(pdfFilePathTarget)), "application/pdf");
// warn if files have a different page count
if (sourceDoc.countPages()!=targetDoc.countPages()) {
console.error(`${sourceFileName} has ${sourceDoc.countPages()} pages, but ${targetFileName} has ${targetDoc.countPages()}! This might become a problem.`)
}
// access the table of contents of both files, delete it in the target file after a warning
var sourceOutline = sourceDoc.outlineIterator();
if (sourceOutline.item()===null) {
console.error(`${sourceFileName} does not have a table of contents that could be copied.`);
return;
}
var targetOutline = targetDoc.outlineIterator();
if (targetOutline.item()!==null) {
console.error(`${targetFileName} already has a table of contents. These will be removed.`);
while (targetOutline.delete()==0);
}
// iterate over source outlineIterator and create identical outlineItems in target document
function copyOutline(sourceOutline, targetOutline) {
let nextResult = 1;
do {
let item = sourceOutline.item();
// If there's no item at the current position, return
if (!item) return;
// Copy the current item to the target document
console.log(`${item.title}`)
targetOutline.insert(item);
targetOutline.prev();
// Check if this item has children
if (sourceOutline.down() === 0) {
targetOutline.down();
// Process all children recursively
copyOutline(sourceOutline, targetOutline);
targetOutline.up();
}
// Move back up to the parent level
sourceOutline.up();
// Move to the next sibling
nextResult = sourceOutline.next();
targetOutline.next();
} while (nextResult === 0)
}
copyOutline(sourceOutline, targetOutline);
// iterate over source outlineIterator AND objects and copy F and C object to new document
// this has to be a separate iteration as the objects in the targetDocument continuously update and change with the targetOutline.insert(item)-step in the iteration above
// it would be possible to only iterate over the objects, but by using an outline iterator this iteration can be very similar to the one above.
sourceOutline = sourceDoc.outlineIterator();
targetOutline = targetDoc.outlineIterator();
var sourceOutlineObject = sourceDoc.getTrailer().get("Root").get("Outlines").resolve();
var targetOutlineObject = targetDoc.getTrailer().get("Root").get("Outlines").resolve();
function copyOutlineParameters(sourceOutline, sourceOutlineObject, targetOutlineObject) {
let nextResult = 1;
// enter the current outline-tree
sourceOutlineObject = sourceOutlineObject.get("First").resolve();
targetOutlineObject = targetOutlineObject.get("First").resolve();
do {
let item = sourceOutline.item();
// If there's no item at the current position, return
if (!item) return;
// Copy the values of F and the array-values of C to the outline objects in the target document
let F = 0;
if (!sourceOutlineObject.get("F").isNull()) {
F = sourceOutlineObject.get("F").valueOf();
targetOutlineObject.put("F", F);
}
let C = sourceOutlineObject.get("C");
if (!C.isNull()) {
if (C.isArray()) {
let arr = targetDoc.newArray();
C.forEach((value,key) => {arr.put(key, value)});
targetOutlineObject.put("C", arr);
}
}
// Check if this item has children
if (sourceOutline.down() === 0) {
// Process all children recursively
copyOutlineParameters(sourceOutline, sourceOutlineObject, targetOutlineObject);
}
// Move back up to the parent level
sourceOutline.up();
// Move to the next sibling, both in the iterator and the outline objects
nextResult = sourceOutline.next();
if (nextResult === 0) {
// move to the next object in the outline-tree
sourceOutlineObject = sourceOutlineObject.get("Next").resolve();
targetOutlineObject = targetOutlineObject.get("Next").resolve();
}
else {
// exit the current outline-tree
sourceOutlineObject = sourceOutlineObject.get("Parent").resolve();
targetOutlineObject = targetOutlineObject.get("Parent").resolve();
}
} while (nextResult === 0)
}
copyOutlineParameters(sourceOutline, sourceOutlineObject, targetOutlineObject);
// save modified file
try {
console.log(`Table of contents copied to ${targetFileName})`);
fs.writeFileSync(path.resolve(pdfFilePathTarget), targetDoc.saveToBuffer("").asUint8Array())
} catch (error) {
console.error('Error saving modified PDF file:', error);
}
} catch (error) {
console.error('Error processing PDF files:', error);
}
}
// If this script is run directly (not imported)
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
if (process.argv[1] === __filename) {
// Get file path from command line arguments
const args = process.argv.slice(2);
if (args.length !== 2) {
console.error('Please provide two pdf file paths');
process.exit(1);
}
const pdfFilePath1 = args[0];
const pdfFilePath2 = args[1];
copyToC(pdfFilePath1, pdfFilePath2);
} else {
module.exports = { copyToC };
}
@HeimMatthias
Copy link
Author

Use MuPDF's JavaScript API to copy the "table of contents" / document outline from one pdf file to another.

The script also copies styling information (italics / bold /color), by accessing this information via mupdf's PDFObject-API. It is mainly intended as a proof of concept for this type of operation.

This is useful if you have to versions of the same pdf and want to copy the outline from one version to another.

Warning: the script performs no checks whether the outlined targets exist or make sense in the new document.
Warning: this script removes existing outlines from the target document and overwrites it in place. Always keep a copy of the target document in case something goes horribly wrong.

v.1 - 13.03.2025

run as follows: node copytoc.mjs source.pdf target.pdf

@ccxvii
Copy link

ccxvii commented Mar 13, 2025

I will be extending the OutlineItem to include style and r,g,b properties for setting the style and color. This change will not be available until the the next major MuPDF release though, since the core library functionality it depends on is not yet available.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment