mallendeo · August 10, 2016 02:38
diff --git a/pdf-extract.js b/pdf-extract.js
 'use strict'

 const fs = require('fs-extra')
 const spawn = require('child_process').spawn

 const originalPath = __dirname + '/pdfs/original'
 const extractedPath = __dirname + '/pdfs/extracted'

 fs.removeSync(extractedPath)
 fs.copySync(originalPath, extractedPath)
 fs.walk(extractedPath).on('data', item => {
  let pathname = item.path.match(/(.*?)\.pdf/)
  let filename = item.path.match(/.{1,}\/(.*?\.pdf)/)

  pathname = pathname && pathname[1]
  filename = filename && filename[1]

  if (pathname && filename) {
    const filepath = pathname + '/' + filename

    fs.move(item.path, filepath, (err) => {
      if (err) return console.log(err)
      spawn('pdftohtml', [filepath]).stdout.on('close', () => {
        fs.removeSync(pathname + '/*.html')
        console.log(filename + ' ok!')
      })
    })
  }
 })
	'use strict'

	const fs = require('fs-extra')
	const spawn = require('child_process').spawn

	const originalPath = __dirname + '/pdfs/original'
	const extractedPath = __dirname + '/pdfs/extracted'

	fs.removeSync(extractedPath)
	fs.copySync(originalPath, extractedPath)
	fs.walk(extractedPath).on('data', item => {
	let pathname = item.path.match(/(.*?)\.pdf/)
	let filename = item.path.match(/.{1,}\/(.*?\.pdf)/)

	pathname = pathname && pathname[1]
	filename = filename && filename[1]

	if (pathname && filename) {
	const filepath = pathname + '/' + filename

	fs.move(item.path, filepath, (err) => {
	if (err) return console.log(err)
	spawn('pdftohtml', [filepath]).stdout.on('close', () => {
	fs.removeSync(pathname + '/*.html')
	console.log(filename + ' ok!')
	})
	})
	}
	})