Created
March 13, 2016 12:09
-
-
Save maxrabin/e3e51abc365cd3f54d78 to your computer and use it in GitHub Desktop.
Example Lambda Function to process lines of text files when uploaded to S3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
var AWS = require('aws-sdk'); | |
var S3 = new AWS.S3(); | |
var readline = require('readline'); | |
exports.handler = function (event, context) { | |
//Get S3 file bucket and name | |
//Make sure to loop through event.Records, don't assume there is only 1 in production!!! | |
var bucket = event.Records[0].s3.bucket.name; | |
var key = event.Records[0].s3.object.key; | |
//Create read stream from S3 | |
var s3ReadStream = S3.getObject({Bucket: bucket, Key: key}).createReadStream(); | |
//handle stream errors. | |
//Pass the S3 read stream into the readline interface to break into lines | |
var readlineStream = readline.createInterface({input: s3ReadStream, terminal: false}); | |
//handle stream errors | |
var totalLineCount = 0; | |
var totalCharCount = 0; | |
readlineStream.on('line', function (line) { | |
//Do whatever you need with the line | |
//In this example we are just counting the number of lines and characters | |
totalLineCount += 1; | |
totalCharCount += line.length; | |
}); | |
readlineStream.on('close', function () { | |
//Do cleanup here such as persist resultant calculations. | |
//In this example I'll just print to the log the total number of lines: | |
console.log("In s3://" + bucket + "/" + key + " there are " + totalLineCount + " lines, " + totalCharCount + " chars."); | |
context.succeed(); | |
}); | |
}; |
Awesome indeed! Apparently you can also write S3 files by line...
https://gist.github.com/hboylan/68ad1bea3e603b33e338c39bbd8c72d3
Hi
Im having issues when inserting to dynamo or encrypting using KMS, seems that lambda is not waiting for this functions to finish, any ideas ?
@Jesusbone, lambda has a timeout from 3 seconds up to 5 mins, even if the code has not completed running.
The function is not waiting for the end of the stream, you need to wrap the streaming process inside a promise and resolve it on close
event.
exports.mys3UpdaloadEventhandler = async (event, context) => {
console.log('event', event);
console.log('context', context);
const bucket = event.Records[0].s3.bucket.name;
console.log('bucket', bucket);
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
try {
const params = {
Bucket: bucket,
Key: key,
};
const s3ReadStream = s3.getObject(params).createReadStream();
console.log('s3Stream', s3ReadStream);
const readlineStream = readline.createInterface({ input: s3ReadStream, terminal: false });
const readlineStreamTask = new Promise((resolve, reject) => {
readlineStream.on('error', (err) => {
console.error(err);
reject();
});
readlineStream.on('line', (input) => console.log('line', input));
readlineStream.on('close', (data) => resolve());
});
await readlineStreamTask;
console.log('end of request');
} catch (error) {
console.log(error);
}
};
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This code is awesome