Created
August 27, 2023 16:04
-
-
Save dmorosinotto/b4373d21b783750f49c87c8e30550514 to your computer and use it in GitHub Desktop.
A nodeless/serverless redistribution of Mozilla's PDF.js for serverless enviroments, like Deno Deploy and Cloudflare Workers with zero dependencies.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//ORIGINAL CODE REPO: https://github.com/johannschopplich/pdfjs-serverless | |
//# pnpm | |
//pnpm add pdfjs-serverless | |
//# npm | |
//npm install pdfjs-serverless | |
import { getDocument } from 'https://esm.sh/pdfjs-serverless' | |
const data = Deno.readFileSync('./dummy.pdf') | |
const doc = await getDocument(data).promise | |
console.log(await doc.getMetadata()) | |
for (let i = 1; i <= doc.numPages; i++) { | |
const page = await doc.getPage(i) | |
const textContent = await page.getTextContent() | |
const contents = textContent.items.map(item => item.str).join(' ') | |
console.log(contents) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { defineUnPDFConfig, extractPDFText } from 'node:unpdf' | |
// Use the serverless version of PDF.js | |
defineUnPDFConfig({ | |
pdfjs: () => import('pdfjs-serverless') | |
}) | |
// Now, you can use the other methods provided by unpdf | |
// Fetch a PDF file from the web | |
const pdf = await fetch('https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf') | |
.then(res => res.arrayBuffer()) | |
// Pass the PDF buffer to the relevant method | |
const { totalPages, text } = await extractPDFText( | |
new Uint8Array(pdf), { mergePages: true } | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment