Created
March 13, 2025 09:57
-
-
Save HeimMatthias/6e224610b2476e92a4326dbe8f645731 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Use MuPDF's JavaScript API to copy the "table of contents" / document outline from one pdf file to another | |
| * The script also copies styling information (italics / bold / color), by accessing this information via | |
| * mupdf's PDFObject-API. It is mainly intended as a proof of concept for this type of operation. | |
| * This is useful if you have to versions of the same pdf and want to copy the outline from one version to another | |
| * Warning: the script performs no checks whether the outlined targets exist or make sense in the new document. | |
| * Warning: this script removes existing outlines from the target document and overwrites it in place. | |
| * Always keep a copy of the target document in case something goes horribly wrong. | |
| * v.1 - 13.03.2025 | |
| * run as follows: node copytoc.mjs source.pdf target.pdf | |
| * | |
| * Copyright (C) 2025 Matthias Heim | |
| This program is free software: you can redistribute it and/or modify | |
| it under the terms of the GNU Affero General Public License as | |
| published by the Free Software Foundation, either version 3 of the | |
| License, or (at your option) any later version. | |
| This program is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU Affero General Public License for more details. | |
| You should have received a copy of the GNU Affero General Public License | |
| along with this program. If not, see <https://www.gnu.org/licenses/>. | |
| */ | |
| import * as fs from "fs"; | |
| import * as path from "path"; | |
| import { fileURLToPath } from 'url'; | |
| import * as mupdfjs from "mupdf/mupdfjs" | |
| /** | |
| * Copy the table of contents from one pdf file to another | |
| * @param {string} pdfFilePathSource - Path to the PDF file FROM which the toc is copied | |
| * @param {string} pdfFilePathTarget - Path to the PDF file TO which the toc is copied | |
| */ | |
| function copyToC(pdfFilePathSource, pdfFilePathTarget) { | |
| try { | |
| // get file paths | |
| const sourceFileDir = path.dirname(pdfFilePathSource); | |
| const sourceFileName = path.basename(pdfFilePathSource, '.pdf'); | |
| const targetFileDir = path.dirname(pdfFilePathTarget); | |
| const targetFileName = path.basename(pdfFilePathTarget, '.pdf'); | |
| // open both files in mupdf | |
| var sourceDoc = mupdfjs.PDFDocument.openDocument(fs.readFileSync(path.resolve(pdfFilePathSource)), "application/pdf"); | |
| var targetDoc = mupdfjs.PDFDocument.openDocument(fs.readFileSync(path.resolve(pdfFilePathTarget)), "application/pdf"); | |
| // warn if files have a different page count | |
| if (sourceDoc.countPages()!=targetDoc.countPages()) { | |
| console.error(`${sourceFileName} has ${sourceDoc.countPages()} pages, but ${targetFileName} has ${targetDoc.countPages()}! This might become a problem.`) | |
| } | |
| // access the table of contents of both files, delete it in the target file after a warning | |
| var sourceOutline = sourceDoc.outlineIterator(); | |
| if (sourceOutline.item()===null) { | |
| console.error(`${sourceFileName} does not have a table of contents that could be copied.`); | |
| return; | |
| } | |
| var targetOutline = targetDoc.outlineIterator(); | |
| if (targetOutline.item()!==null) { | |
| console.error(`${targetFileName} already has a table of contents. These will be removed.`); | |
| while (targetOutline.delete()==0); | |
| } | |
| // iterate over source outlineIterator and create identical outlineItems in target document | |
| function copyOutline(sourceOutline, targetOutline) { | |
| let nextResult = 1; | |
| do { | |
| let item = sourceOutline.item(); | |
| // If there's no item at the current position, return | |
| if (!item) return; | |
| // Copy the current item to the target document | |
| console.log(`${item.title}`) | |
| targetOutline.insert(item); | |
| targetOutline.prev(); | |
| // Check if this item has children | |
| if (sourceOutline.down() === 0) { | |
| targetOutline.down(); | |
| // Process all children recursively | |
| copyOutline(sourceOutline, targetOutline); | |
| targetOutline.up(); | |
| } | |
| // Move back up to the parent level | |
| sourceOutline.up(); | |
| // Move to the next sibling | |
| nextResult = sourceOutline.next(); | |
| targetOutline.next(); | |
| } while (nextResult === 0) | |
| } | |
| copyOutline(sourceOutline, targetOutline); | |
| // iterate over source outlineIterator AND objects and copy F and C object to new document | |
| // this has to be a separate iteration as the objects in the targetDocument continuously update and change with the targetOutline.insert(item)-step in the iteration above | |
| // it would be possible to only iterate over the objects, but by using an outline iterator this iteration can be very similar to the one above. | |
| sourceOutline = sourceDoc.outlineIterator(); | |
| targetOutline = targetDoc.outlineIterator(); | |
| var sourceOutlineObject = sourceDoc.getTrailer().get("Root").get("Outlines").resolve(); | |
| var targetOutlineObject = targetDoc.getTrailer().get("Root").get("Outlines").resolve(); | |
| function copyOutlineParameters(sourceOutline, sourceOutlineObject, targetOutlineObject) { | |
| let nextResult = 1; | |
| // enter the current outline-tree | |
| sourceOutlineObject = sourceOutlineObject.get("First").resolve(); | |
| targetOutlineObject = targetOutlineObject.get("First").resolve(); | |
| do { | |
| let item = sourceOutline.item(); | |
| // If there's no item at the current position, return | |
| if (!item) return; | |
| // Copy the values of F and the array-values of C to the outline objects in the target document | |
| let F = 0; | |
| if (!sourceOutlineObject.get("F").isNull()) { | |
| F = sourceOutlineObject.get("F").valueOf(); | |
| targetOutlineObject.put("F", F); | |
| } | |
| let C = sourceOutlineObject.get("C"); | |
| if (!C.isNull()) { | |
| if (C.isArray()) { | |
| let arr = targetDoc.newArray(); | |
| C.forEach((value,key) => {arr.put(key, value)}); | |
| targetOutlineObject.put("C", arr); | |
| } | |
| } | |
| // Check if this item has children | |
| if (sourceOutline.down() === 0) { | |
| // Process all children recursively | |
| copyOutlineParameters(sourceOutline, sourceOutlineObject, targetOutlineObject); | |
| } | |
| // Move back up to the parent level | |
| sourceOutline.up(); | |
| // Move to the next sibling, both in the iterator and the outline objects | |
| nextResult = sourceOutline.next(); | |
| if (nextResult === 0) { | |
| // move to the next object in the outline-tree | |
| sourceOutlineObject = sourceOutlineObject.get("Next").resolve(); | |
| targetOutlineObject = targetOutlineObject.get("Next").resolve(); | |
| } | |
| else { | |
| // exit the current outline-tree | |
| sourceOutlineObject = sourceOutlineObject.get("Parent").resolve(); | |
| targetOutlineObject = targetOutlineObject.get("Parent").resolve(); | |
| } | |
| } while (nextResult === 0) | |
| } | |
| copyOutlineParameters(sourceOutline, sourceOutlineObject, targetOutlineObject); | |
| // save modified file | |
| try { | |
| console.log(`Table of contents copied to ${targetFileName})`); | |
| fs.writeFileSync(path.resolve(pdfFilePathTarget), targetDoc.saveToBuffer("").asUint8Array()) | |
| } catch (error) { | |
| console.error('Error saving modified PDF file:', error); | |
| } | |
| } catch (error) { | |
| console.error('Error processing PDF files:', error); | |
| } | |
| } | |
| // If this script is run directly (not imported) | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = path.dirname(__filename); | |
| if (process.argv[1] === __filename) { | |
| // Get file path from command line arguments | |
| const args = process.argv.slice(2); | |
| if (args.length !== 2) { | |
| console.error('Please provide two pdf file paths'); | |
| process.exit(1); | |
| } | |
| const pdfFilePath1 = args[0]; | |
| const pdfFilePath2 = args[1]; | |
| copyToC(pdfFilePath1, pdfFilePath2); | |
| } else { | |
| module.exports = { copyToC }; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I will be extending the OutlineItem to include style and r,g,b properties for setting the style and color. This change will not be available until the the next major MuPDF release though, since the core library functionality it depends on is not yet available.