Last active
August 21, 2025 08:14
-
-
Save mid-kid/0cc9f37a7074ad40e65564569a2ba926 to your computer and use it in GitHub Desktop.
Merge all attributes of multiple PDF files using MuPDF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Merge all attributes of multiple PDF files (pdf-full-merge.js) | |
// Extended from MuPDF's docs/examples/pdf-merge.js | |
// Ever had problems with tools that don't copy certain attributes of a PDF? | |
// This script uses MuPDF to merge/join/concatenate as much as possible, including: | |
// - bookmarks / outlines / table of contents | |
// - link attributes of said outlines, such as viewrect and zoom | |
// - whether outlines appear open or closed by default | |
// - annotations, including links | |
// - 3d objects / PDF3D | |
// - author / title metadata | |
// - first page / zoom settings metadata | |
// - javascript scripts and actions | |
// This script can also serve as an example on how to change any of these | |
// settings manually, or copy them differently. | |
function adjustURI(uri, startPage) { | |
// Rewrite link destination URI, to increment the "page" parameter | |
if (uri.slice(0, 6) == "#page=") { | |
var amp = uri.search("&") | |
if (amp == -1) amp = uri.length | |
var page = Number(uri.slice(6, amp)) | |
var rest = uri.slice(amp) | |
uri = "#page=" + (startPage + page) + rest | |
} | |
return uri | |
} | |
function copyPage(dstDoc, srcDoc, pageNumber, dstFromSrc) { | |
var srcPage, dstPage | |
srcPage = srcDoc.findPage(pageNumber) | |
dstPage = dstDoc.newDictionary() | |
dstPage.Type = dstDoc.newName("Page") | |
if (srcPage.MediaBox) dstPage.MediaBox = dstFromSrc.graftObject(srcPage.MediaBox) | |
if (srcPage.Rotate) dstPage.Rotate = dstFromSrc.graftObject(srcPage.Rotate) | |
if (srcPage.Resources) dstPage.Resources = dstFromSrc.graftObject(srcPage.Resources) | |
if (srcPage.Contents) dstPage.Contents = dstFromSrc.graftObject(srcPage.Contents) | |
if (srcPage.Group) dstPage.Group = dstFromSrc.graftObject(srcPage.Group) | |
if (srcPage.Annots) dstPage.Annots = dstFromSrc.graftObject(srcPage.Annots) | |
dstDoc.insertPage(-1, dstDoc.addObject(dstPage)) | |
} | |
function adjustLinks(dstDoc, srcDoc, startPage, pageNumber) { | |
var dstLinks = dstDoc.loadPage(startPage + pageNumber).getLinks() | |
var srcLinks = srcDoc.loadPage(pageNumber).getLinks() | |
var i | |
for (i = 0; i < srcLinks.length; ++i) { | |
dstLinks[i].setURI(adjustURI(srcLinks[i].getURI(), startPage)) | |
} | |
} | |
function copyAllPages(dstDoc, srcDoc, startPage, dstFromSrc) { | |
var i, n = srcDoc.countPages() | |
for (i = 0; i < n; ++i) | |
copyPage(dstDoc, srcDoc, i, dstFromSrc) | |
for (i = 0; i < n; ++i) | |
adjustLinks(dstDoc, srcDoc, startPage, i) | |
} | |
function copyOutlineRec(cursor, srcDoc, startPage, list) { | |
list.forEach(function (node) { | |
var uri = adjustURI(node.uri, startPage) | |
// Insert empty item, to insert its children first | |
cursor.insert({}) | |
cursor.prev() | |
if (node.down) { | |
cursor.down() | |
copyOutlineRec(cursor, srcDoc, startPage, node.down) | |
cursor.up() | |
} | |
// Add information to item afterwards, so we can set its open status | |
cursor.update({ title: node.title, uri: uri, open: node.open }) | |
cursor.next() | |
}) | |
} | |
function copyOutline(dstDoc, srcDoc, startPage) { | |
var cursor = dstDoc.outlineIterator() | |
while (cursor.item()) | |
cursor.next() | |
copyOutlineRec(cursor, srcDoc, startPage, srcDoc.loadOutline()) | |
} | |
function copyNames(dstDoc, srcDoc, dstFromSrc) { | |
var dstRoot = dstDoc.getTrailer().Root | |
var srcRoot = srcDoc.getTrailer().Root | |
if (!srcRoot.Names) return; | |
if (!dstRoot.Names) dstRoot.Names = dstDoc.newDictionary() | |
if (srcRoot.Names.JavaScript && srcRoot.Names.JavaScript.Names && srcRoot.Names.JavaScript.Names.isArray()) { | |
if (!dstRoot.Names.JavaScript) dstRoot.Names.JavaScript = dstDoc.newDictionary() | |
if (!dstRoot.Names.JavaScript.Names) dstRoot.Names.JavaScript.Names = dstDoc.newArray() | |
var dstNames = dstRoot.Names.JavaScript.Names | |
var srcNames = srcRoot.Names.JavaScript.Names | |
var i | |
for (i = 0; i < srcNames.length; ++i) | |
dstNames.push(dstFromSrc.graftObject(srcNames[i])) | |
} | |
} | |
function copyMeta(dstDoc, srcDoc) { | |
var dstRoot = dstDoc.getTrailer().Root | |
var srcRoot = srcDoc.getTrailer().Root | |
var dstInfo = dstDoc.getTrailer().Info | |
var srcInfo = srcDoc.getTrailer().Info | |
// Use the metadata items found in the most recent pdf file | |
if (srcRoot.PageLayout) dstRoot.PageLayout = srcRoot.PageLayout | |
if (srcRoot.PageMode) dstRoot.PageMode = srcRoot.PageMode | |
if (srcInfo.Title) dstInfo.Title = srcInfo.Title | |
if (srcInfo.Author) dstInfo.Author = srcInfo.Author | |
if (srcInfo.Subject) dstInfo.Subject = srcInfo.Subject | |
if (srcInfo.Keywords) dstInfo.Keywords = srcInfo.Keywords | |
} | |
function pdfmerge() { | |
var srcDoc, dstDoc, i | |
// Initialize PDF and set version | |
dstDoc = new PDFDocument() | |
dstDoc.getTrailer().Root.Version = "1.4" | |
dstDoc.getTrailer().Info = dstDoc.addObject({}) | |
var pages = 0 | |
for (i = 1; i < scriptArgs.length; ++i) { | |
srcDoc = new PDFDocument(scriptArgs[i]) | |
var dstFromSrc = dstDoc.newGraftMap() | |
copyAllPages(dstDoc, srcDoc, pages, dstFromSrc) | |
copyOutline(dstDoc, srcDoc, pages) | |
copyNames(dstDoc, srcDoc, dstFromSrc) | |
copyMeta(dstDoc, srcDoc) | |
pages += srcDoc.countPages() | |
} | |
// Set static metadata | |
var dstInfo = dstDoc.getTrailer().Info | |
dstInfo.Producer = dstDoc.newString("MuPDF") | |
dstInfo.CreationDate = dstDoc.newString( | |
"D:" + new Date().toISOString().replace(/[-:T]/g,"").split(".")[0]) | |
dstDoc.save(scriptArgs[0], "compress,garbage") | |
} | |
if (scriptArgs.length < 2) | |
print("usage: mutool run pdf-full-merge.js output.pdf input1.pdf input2.pdf ...") | |
else | |
pdfmerge() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment