Last active
January 3, 2025 15:22
-
-
Save hubgit/600ec0c224481e910d2a0f883a7b98e3 to your computer and use it in GitHub Desktop.
Render the text of a PDF with PDF.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<meta charset="utf-8"> | |
<title>Render the text of a PDF with PDF.js</title> | |
<style> | |
.page-container { | |
box-shadow: 0 1px 3px #444; | |
position: relative; | |
font-size: 1px; | |
line-height: 1; | |
} | |
span { | |
position: absolute; | |
cursor: text; | |
white-space: pre; | |
transform-origin: left bottom; | |
} | |
</style> | |
<body> | |
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script> | |
<script> | |
PDFJS.disableWorker = true | |
// PDFJS.workerSrc = 'bower_components/pdfjs-dist/build/pdf.worker.js' | |
// PDFJS.cMapUrl = 'bower_components/pdfjs-dist/cmaps' | |
// PDFJS.cMapPacked = true | |
PDFJS.disableRange = true | |
// PDFJS.disableStream = true | |
</script> | |
<script> | |
PDFJS.getDocument('https://peerj.com/articles/2548.pdf').then(function (pdf) { | |
var ctx = document.createElement('canvas').getContext('2d', { alpha: false }); | |
for (var i = 1; i <= pdf.numPages; i++) { | |
pdf.getPage(i).then(function (page) { | |
var viewport = page.getViewport(1.5); | |
var pageContainer = document.createElement('div'); | |
pageContainer.classList.add('page-container'); | |
pageContainer.style.width = viewport.width + 'px'; | |
pageContainer.style.height = viewport.height + 'px'; | |
// var pageContainer = document.createElementNS('http://www.w3.org/2000/svg', 'svg:svg'); | |
// pageContainer.setAttribute('width', viewport.width + 'px'); | |
// pageContainer.setAttribute('height', viewport.height + 'px'); | |
// pageContainer.setAttribute('font-size', 1); | |
document.body.appendChild(pageContainer); | |
// page.getOperatorList().then(function (opList) { | |
// var svgGfx = new PDFJS.SVGGraphics(page.commonObjs, page.objs); | |
// | |
// svgGfx.getSVG(opList, viewport).then(function (svg) { | |
// pageContainer.appendChild(svg); | |
// }); | |
// }); | |
page.getTextContent({ normalizeWhitespace: true }).then(function (textContent) { | |
textContent.items.forEach(function (textItem) { | |
var tx = PDFJS.Util.transform( | |
PDFJS.Util.transform(viewport.transform, textItem.transform), | |
[1, 0, 0, -1, 0, 0] | |
); | |
var style = textContent.styles[textItem.fontName]; | |
// adjust for font ascent/descent | |
var fontSize = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3])); | |
if (style.ascent) { | |
tx[5] -= fontSize * style.ascent; | |
} else if (style.descent) { | |
tx[5] -= fontSize * (1 + style.descent); | |
} else { | |
tx[5] -= fontSize / 2; | |
} | |
// adjust for rendered width | |
if (textItem.width > 0) { | |
ctx.font = tx[0] + 'px ' + style.fontFamily; | |
var width = ctx.measureText(textItem.str).width; | |
if (width > 0) { | |
//tx[0] *= (textItem.width * viewport.scale) / width; | |
tx[0] = (textItem.width * viewport.scale) / width; | |
} | |
} | |
// var item = document.createElementNS('http://www.w3.org/2000/svg', 'svg:text'); | |
// item.textContent = textItem.str; | |
// item.setAttribute('font-family', style.fontFamily); | |
// item.setAttribute('transform', 'matrix(' + tx.join(' ') + ')'); | |
var item = document.createElement('span'); | |
item.textContent = textItem.str; | |
item.style.fontFamily = style.fontFamily; | |
//item.style.transform = 'matrix(' + tx.join(',') + ')'; | |
item.style.fontSize = fontSize + 'px'; | |
item.style.transform = 'scaleX(' + tx[0] + ')'; | |
item.style.left = tx[4] + 'px'; | |
item.style.top = tx[5] + 'px'; | |
pageContainer.appendChild(item); | |
}); | |
}); | |
}); | |
} | |
}); | |
</script> |
I was looking to make a pdf viewer for react-native by extracting these information. Thank you for your example, it will really help me get started!
I struggled to calculate transform > translateY. it has been very helpful. Thank you
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
was locking for an example with textItem styles for a couple of days, thank you!