Created
October 18, 2023 21:39
-
-
Save mid-kid/0cc9f37a7074ad40e65564569a2ba926 to your computer and use it in GitHub Desktop.
Merge all attributes of multiple PDF files using MuPDF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Merge all attributes of multiple PDF files (pdf-full-merge.js) | |
// Extended from MuPDF's docs/examples/pdf-merge.js | |
// Ever had problems with tools that don't copy certain attributes of a PDF? | |
// This script uses MuPDF to merge/join/concatenate as much as possible, including: | |
// - bookmarks / outlines / table of contents | |
// - link attributes of said outlines, such as viewrect and zoom | |
// - whether outlines appear open or closed by default | |
// - annotations | |
// - 3d objects / PDF3D | |
// - author / title metadata | |
// - first page / zoom settings metadata | |
// Notable exceptions/omissions: | |
// - javascript scripts and actions | |
// This script can also serve as an example on how to change any of these | |
// settings manually, or copy them differently. | |
function copyPage(dstDoc, srcDoc, pageNumber, dstFromSrc) { | |
var srcPage, dstPage | |
srcPage = srcDoc.findPage(pageNumber) | |
dstPage = dstDoc.newDictionary() | |
dstPage.Type = dstDoc.newName("Page") | |
if (srcPage.MediaBox) dstPage.MediaBox = dstFromSrc.graftObject(srcPage.MediaBox) | |
if (srcPage.Rotate) dstPage.Rotate = dstFromSrc.graftObject(srcPage.Rotate) | |
if (srcPage.Resources) dstPage.Resources = dstFromSrc.graftObject(srcPage.Resources) | |
if (srcPage.Contents) dstPage.Contents = dstFromSrc.graftObject(srcPage.Contents) | |
if (srcPage.Group) dstPage.Group = dstFromSrc.graftObject(srcPage.Group) | |
if (srcPage.Annots) dstPage.Annots = dstFromSrc.graftObject(srcPage.Annots) | |
dstDoc.insertPage(-1, dstDoc.addObject(dstPage)) | |
} | |
function copyAllPages(dstDoc, srcDoc) { | |
var dstFromSrc = dstDoc.newGraftMap() | |
var k, n = srcDoc.countPages() | |
for (k = 0; k < n; ++k) | |
copyPage(dstDoc, srcDoc, k, dstFromSrc) | |
} | |
function copyOutlineRec(cursor, srcDoc, startPage, list) { | |
list.forEach(function (node) { | |
// Rewrite link destination URI, to increment the "page" parameter | |
var uri = node.uri | |
if (uri.slice(0, 6) == "#page=") { | |
var amp = uri.search("&") | |
if (amp == -1) amp = uri.length | |
var page = Number(uri.slice(6, amp)) | |
var rest = uri.slice(amp) | |
uri = "#page=" + (startPage + page) + rest | |
} | |
// Insert empty item, to insert its children first | |
cursor.insert({}) | |
cursor.prev() | |
if (node.down) { | |
cursor.down() | |
copyOutlineRec(cursor, srcDoc, startPage, node.down) | |
cursor.up() | |
} | |
// Add information to item afterwards, so we can set its open status | |
cursor.update({ title: node.title, uri: uri, open: node.open }) | |
cursor.next() | |
}) | |
} | |
function copyOutline(dstDoc, srcDoc, startPage) { | |
var cursor = dstDoc.outlineIterator() | |
while (cursor.item()) | |
cursor.next() | |
copyOutlineRec(cursor, srcDoc, startPage, srcDoc.loadOutline()) | |
} | |
function copyMeta(dstDoc, srcDoc) { | |
var dstRoot = dstDoc.getTrailer().Root | |
var srcRoot = srcDoc.getTrailer().Root | |
var dstInfo = dstDoc.getTrailer().Info | |
var srcInfo = srcDoc.getTrailer().Info | |
// Use the metadata items found in the most recent pdf file | |
if (srcRoot.PageLayout) dstRoot.PageLayout = srcRoot.PageLayout | |
if (srcRoot.PageMode) dstRoot.PageMode = srcRoot.PageMode | |
if (srcInfo.Title) dstInfo.Title = srcInfo.Title | |
if (srcInfo.Author) dstInfo.Author = srcInfo.Author | |
if (srcInfo.Subject) dstInfo.Subject = srcInfo.Subject | |
if (srcInfo.Keywords) dstInfo.Keywords = srcInfo.Keywords | |
} | |
function pdfmerge() { | |
var srcDoc, dstDoc, i | |
// Initialize PDF and set version | |
dstDoc = new PDFDocument() | |
dstDoc.getTrailer().Root.Version = "1.4" | |
dstDoc.getTrailer().Info = dstDoc.addObject({}) | |
var pages = 0 | |
for (i = 1; i < scriptArgs.length; ++i) { | |
srcDoc = new PDFDocument(scriptArgs[i]) | |
copyAllPages(dstDoc, srcDoc) | |
copyOutline(dstDoc, srcDoc, pages) | |
copyMeta(dstDoc, srcDoc) | |
pages += srcDoc.countPages() | |
} | |
// Set static metadata | |
var dstInfo = dstDoc.getTrailer().Info | |
dstInfo.Creator = dstDoc.newString("pdf-full-merge.js") | |
dstInfo.Producer = dstDoc.newString("MuPDF") | |
dstInfo.CreationDate = dstDoc.newString( | |
"D:" + new Date().toISOString().replace(/[-:T]/g,"").split(".")[0]) | |
dstDoc.save(scriptArgs[0], "compress,garbage") | |
} | |
if (scriptArgs.length < 2) | |
print("usage: mutool run pdf-full-merge.js output.pdf input1.pdf input2.pdf ...") | |
else | |
pdfmerge() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment