-
-
Save chopfitzroy/2f1a3210ff4ac4395fe1b24f18381e92 to your computer and use it in GitHub Desktop.
Node.js script to runs on AWS Lambda. Converts the PDF pages to JPG images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var async = require("async"); | |
var AWS = require("aws-sdk"); | |
var gm = require("gm").subClass({imageMagick: true}); | |
var fs = require("fs"); | |
var mktemp = require("mktemp"); | |
var PAGE_WIDTH = 1300, | |
PAGE_HEIGHT = 1300; | |
var utils = { | |
decodeKey: function(key) { | |
return decodeURIComponent(key).replace(/\+/g, ' '); | |
} | |
}; | |
var s3 = new AWS.S3(); | |
exports.handler = function(event, context, callback) { | |
//console.log('Received event:', JSON.stringify(event, null, 2)); | |
var bucket = event.Records[0].s3.bucket.name, | |
srcKey = utils.decodeKey(event.Records[0].s3.object.key), | |
dstPrefix = srcKey.replace(/\.\w+$/, "") + '/', | |
fileType = srcKey.slice(-3, srcKey.length); | |
if (!fileType || fileType != 'pdf') { | |
var msg = "Invalid filetype found for key: " + srcKey; | |
callback(msg); | |
return; | |
} | |
console.log('starting the convertion process...'); | |
function upload(data, filename) { | |
console.time("upload"); | |
s3.putObject({ | |
Bucket: bucket, | |
Key: dstPrefix + filename, | |
Body: data, | |
ContentType: "image/jpeg", | |
ACL: 'public-read', | |
Metadata: { | |
thumbnail: 'TRUE' | |
} | |
}, function(err, data) { | |
console.timeEnd("upload"); | |
if (err) { | |
console.error(err); | |
return; | |
} | |
console.log('file ' + filename + ' was uploaded.'); | |
}); | |
} | |
function uploadAllPages() { | |
console.time("readdir"); | |
fs.readdir('/tmp', function(err, files) { | |
console.timeEnd("readdir"); | |
console.log(files.length + ' was generated: ' + files); | |
async.forEachOf(files, function(value, key, callback) { | |
console.time("readFile"); | |
fs.readFile('/tmp/' + value, function(err, data) { | |
console.timeEnd("readFile"); | |
if (err) { | |
console.error(err); | |
return; | |
} | |
upload(data, value); | |
}); | |
}, function(err) { | |
if (err) { | |
callback(err) | |
} else { | |
console.log('process completed'); | |
context.done(); | |
} | |
}); | |
}); | |
} | |
async.waterfall([ | |
function download(next) { | |
console.time("download"); | |
//Download the image from S3 | |
s3.getObject({ | |
Bucket: bucket, | |
Key: srcKey | |
}, next); | |
}, | |
function convertFile(response, next) { | |
console.timeEnd("download"); | |
if (response.ContentType != 'application/pdf') { | |
var msg = "This file isn't a PDF." | |
console.error(msg); | |
callback(msg); | |
throw msg; | |
} | |
//var temp_file = mktemp.createFileSync("/tmp/XXXXXXXXXX.pdf") | |
//fs.writeFileSync(temp_file, response.Body); | |
//var gmFile = gm(temp_file); // all pages | |
//var gmFile = gm(temp_file + '[0-200]'); // first 200 pages | |
console.time("size"); | |
gm(response.Body).size(function(err, size) { | |
console.timeEnd("size"); | |
var scalingFactor = Math.min(PAGE_WIDTH / size.width, PAGE_HEIGHT / size.height), | |
width = scalingFactor * size.width, | |
height = scalingFactor * size.height; | |
console.log('Generating the images...'); | |
console.time("resize"); | |
this.density(144).borderColor('white').border(0, 0).setFormat("jpeg") | |
.resize(width, height).quality(100).adjoin().write("/tmp/page%06d", function(err) { | |
console.timeEnd("resize"); | |
// if(temp_file) { | |
// fs.unlinkSync(temp_file); | |
// } | |
if (err) { | |
console.error(err); | |
} else { | |
console.log('the images has been extrated.') | |
} | |
next(err); | |
}); | |
}); | |
} | |
], function(err) { | |
if (err) { | |
console.error( | |
"Unable to generate the pages for '" + bucket + "/" + srcKey + "'" + | |
" due to error: " + err | |
); | |
} else { | |
console.log("Created pages for '" + bucket + "/" + srcKey + "'"); | |
uploadAllPages(); | |
} | |
}); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment