Skip to content

Instantly share code, notes, and snippets.

@jvmccarthy
Forked from rufuspollock/pdf2json-tryout.js
Last active August 29, 2015 13:56
Show Gist options
  • Save jvmccarthy/9241221 to your computer and use it in GitHub Desktop.
Save jvmccarthy/9241221 to your computer and use it in GitHub Desktop.
"use strict";
var PFParser = require("pdf2json");
var pdfParser = new PFParser();
pdfParser.on("pdfParser_dataReady", function(data) {
console.log('pdf file: ' + data.pdfFilePath);
console.log('page count: ' + data.data.Pages.length);
console.log('\n\n');
var firstPage = data.data.Pages[0];
var text = firstPage.Texts.map(function(t) {
return t.R[0].T
});
console.log('FIRST PAGE TEXT');
console.log('---------------');
console.log(text[0]);
console.log('\n\n');
console.log('DATA');
console.log('----');
console.log(data);
console.log('\n\n');
console.log('FIRST PAGE');
console.log('----------');
console.log(firstPage);
console.log('\n\n');
});
pdfParser.on("pdfParser_dataError", function(error) {
console.error(error);
});
var args = process.argv.slice(2);
var pdfFilePath = args[0];
console.log('loading pdf...');
pdfParser.loadPDF(pdfFilePath);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment