stekhn · January 3, 2017 10:49
diff --git a/extract.js b/extract.js
 // Extract text from PDF files (with images)
 // Installation guide: https://github.com/nisaacson/pdf-extract

 var extract = (function() {

  'use strict';

  var fs = require('fs');
  var path = require('path');
  var pdfExtract = require('pdf-extract');

  var defaultOptions = {
    type: 'ocr',
    ocr_flags: [
      '-l eng',
    ]
  };

  // Execute script if not used as a module
  if (!module.parent) {

    init(process.argv[2]);
  }

  function init(filePath, options, callback) {

    callback = callback || function (error, response) {

      if (error) { return console.error(error); }

      return console.log(response);
    };

    options = options || defaultOptions;

    if (!filePath) {

      return callback(new Error('No input file (PDF) specified.'));
    }

    processFile(filePath, ocrLanguage, callback);
  }

  function processFile(filePath, ocrLanguage, callback) {

    var processor = pdfExtract(filePath, options, function (error) {

      if (error) {

        callback(error);
      }
    });

    processor.on('complete', function (data) {

      saveFile(filePath + '.txt', data.text_pages, callback);
    });

    processor.on('error', function (error) {

      callback(error);
    });
  }

  function saveFile(filePath, string, callback) {

    // Normalize file path
    filePath = path.normalize(filePath);

    try {

      callback('Saved file ' + filePath);

      // Save file
      return fs.writeFileSync(filePath, string, 'utf8');
    } catch (error) {

      callback(error);
    }
  }

  module.exports = {

    init: init
  };
 }());
	// Extract text from PDF files (with images)
	// Installation guide: https://github.com/nisaacson/pdf-extract

	var extract = (function() {

	'use strict';

	var fs = require('fs');
	var path = require('path');
	var pdfExtract = require('pdf-extract');

	var defaultOptions = {
	type: 'ocr',
	ocr_flags: [
	'-l eng',
	]
	};

	// Execute script if not used as a module
	if (!module.parent) {

	init(process.argv[2]);
	}

	function init(filePath, options, callback) {

	callback = callback \|\| function (error, response) {

	if (error) { return console.error(error); }

	return console.log(response);
	};

	options = options \|\| defaultOptions;

	if (!filePath) {

	return callback(new Error('No input file (PDF) specified.'));
	}

	processFile(filePath, ocrLanguage, callback);
	}

	function processFile(filePath, ocrLanguage, callback) {

	var processor = pdfExtract(filePath, options, function (error) {

	if (error) {

	callback(error);
	}
	});

	processor.on('complete', function (data) {

	saveFile(filePath + '.txt', data.text_pages, callback);
	});

	processor.on('error', function (error) {

	callback(error);
	});
	}

	function saveFile(filePath, string, callback) {

	// Normalize file path
	filePath = path.normalize(filePath);

	try {

	callback('Saved file ' + filePath);

	// Save file
	return fs.writeFileSync(filePath, string, 'utf8');
	} catch (error) {

	callback(error);
	}
	}

	module.exports = {

	init: init
	};
	}());