Skip to content

Instantly share code, notes, and snippets.

@mid-kid
Created October 18, 2023 21:39
Show Gist options
  • Save mid-kid/0cc9f37a7074ad40e65564569a2ba926 to your computer and use it in GitHub Desktop.
Save mid-kid/0cc9f37a7074ad40e65564569a2ba926 to your computer and use it in GitHub Desktop.
Merge all attributes of multiple PDF files using MuPDF
// Merge all attributes of multiple PDF files (pdf-full-merge.js)
// Extended from MuPDF's docs/examples/pdf-merge.js
// Ever had problems with tools that don't copy certain attributes of a PDF?
// This script uses MuPDF to merge/join/concatenate as much as possible, including:
// - bookmarks / outlines / table of contents
// - link attributes of said outlines, such as viewrect and zoom
// - whether outlines appear open or closed by default
// - annotations
// - 3d objects / PDF3D
// - author / title metadata
// - first page / zoom settings metadata
// Notable exceptions/omissions:
// - javascript scripts and actions
// This script can also serve as an example on how to change any of these
// settings manually, or copy them differently.
function copyPage(dstDoc, srcDoc, pageNumber, dstFromSrc) {
var srcPage, dstPage
srcPage = srcDoc.findPage(pageNumber)
dstPage = dstDoc.newDictionary()
dstPage.Type = dstDoc.newName("Page")
if (srcPage.MediaBox) dstPage.MediaBox = dstFromSrc.graftObject(srcPage.MediaBox)
if (srcPage.Rotate) dstPage.Rotate = dstFromSrc.graftObject(srcPage.Rotate)
if (srcPage.Resources) dstPage.Resources = dstFromSrc.graftObject(srcPage.Resources)
if (srcPage.Contents) dstPage.Contents = dstFromSrc.graftObject(srcPage.Contents)
if (srcPage.Group) dstPage.Group = dstFromSrc.graftObject(srcPage.Group)
if (srcPage.Annots) dstPage.Annots = dstFromSrc.graftObject(srcPage.Annots)
dstDoc.insertPage(-1, dstDoc.addObject(dstPage))
}
function copyAllPages(dstDoc, srcDoc) {
var dstFromSrc = dstDoc.newGraftMap()
var k, n = srcDoc.countPages()
for (k = 0; k < n; ++k)
copyPage(dstDoc, srcDoc, k, dstFromSrc)
}
function copyOutlineRec(cursor, srcDoc, startPage, list) {
list.forEach(function (node) {
// Rewrite link destination URI, to increment the "page" parameter
var uri = node.uri
if (uri.slice(0, 6) == "#page=") {
var amp = uri.search("&")
if (amp == -1) amp = uri.length
var page = Number(uri.slice(6, amp))
var rest = uri.slice(amp)
uri = "#page=" + (startPage + page) + rest
}
// Insert empty item, to insert its children first
cursor.insert({})
cursor.prev()
if (node.down) {
cursor.down()
copyOutlineRec(cursor, srcDoc, startPage, node.down)
cursor.up()
}
// Add information to item afterwards, so we can set its open status
cursor.update({ title: node.title, uri: uri, open: node.open })
cursor.next()
})
}
function copyOutline(dstDoc, srcDoc, startPage) {
var cursor = dstDoc.outlineIterator()
while (cursor.item())
cursor.next()
copyOutlineRec(cursor, srcDoc, startPage, srcDoc.loadOutline())
}
function copyMeta(dstDoc, srcDoc) {
var dstRoot = dstDoc.getTrailer().Root
var srcRoot = srcDoc.getTrailer().Root
var dstInfo = dstDoc.getTrailer().Info
var srcInfo = srcDoc.getTrailer().Info
// Use the metadata items found in the most recent pdf file
if (srcRoot.PageLayout) dstRoot.PageLayout = srcRoot.PageLayout
if (srcRoot.PageMode) dstRoot.PageMode = srcRoot.PageMode
if (srcInfo.Title) dstInfo.Title = srcInfo.Title
if (srcInfo.Author) dstInfo.Author = srcInfo.Author
if (srcInfo.Subject) dstInfo.Subject = srcInfo.Subject
if (srcInfo.Keywords) dstInfo.Keywords = srcInfo.Keywords
}
function pdfmerge() {
var srcDoc, dstDoc, i
// Initialize PDF and set version
dstDoc = new PDFDocument()
dstDoc.getTrailer().Root.Version = "1.4"
dstDoc.getTrailer().Info = dstDoc.addObject({})
var pages = 0
for (i = 1; i < scriptArgs.length; ++i) {
srcDoc = new PDFDocument(scriptArgs[i])
copyAllPages(dstDoc, srcDoc)
copyOutline(dstDoc, srcDoc, pages)
copyMeta(dstDoc, srcDoc)
pages += srcDoc.countPages()
}
// Set static metadata
var dstInfo = dstDoc.getTrailer().Info
dstInfo.Creator = dstDoc.newString("pdf-full-merge.js")
dstInfo.Producer = dstDoc.newString("MuPDF")
dstInfo.CreationDate = dstDoc.newString(
"D:" + new Date().toISOString().replace(/[-:T]/g,"").split(".")[0])
dstDoc.save(scriptArgs[0], "compress,garbage")
}
if (scriptArgs.length < 2)
print("usage: mutool run pdf-full-merge.js output.pdf input1.pdf input2.pdf ...")
else
pdfmerge()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment