Skip to content

Instantly share code, notes, and snippets.

@thetrevorharmon
Last active December 13, 2023 23:43
Show Gist options
  • Save thetrevorharmon/6b831875cde98e5dc7f7458f6112b061 to your computer and use it in GitHub Desktop.
Save thetrevorharmon/6b831875cde98e5dc7f7458f6112b061 to your computer and use it in GitHub Desktop.
Query JSON with S3 Select in Node.js
// See tth.im/s3json for a full explanation of this code
const AWS = require('aws-sdk');
const S3 = new AWS.S3();
exports.handler = async (_, context) => {
try {
const query = 'SELECT * FROM s3object[*].results[*] r;';
// test query is against data from https://swapi.co/api/planets/?page=2
const bucket = 'test-bucket-for-s3-select-tutorial';
const key = 'planets.json';
const params = {
Bucket: bucket,
Key: key,
ExpressionType: 'SQL',
Expression: query,
InputSerialization: {
JSON: {
Type: 'DOCUMENT',
}
},
OutputSerialization: {
JSON: {
RecordDelimiter: ','
}
}
}
const data = await getDataUsingS3Select(params);
context.succeed(data);
} catch (error) {
context.fail(error);
}
};
const getDataUsingS3Select = async (params) => {
return new Promise((resolve, reject) => {
S3.selectObjectContent(params, (err, data) => {
if (err) { reject(err); }
if (!data) {
reject('Empty data object');
}
// This will be an array of bytes of data, to be converted
// to a buffer
const records = []
// This is a stream of events
data.Payload.on('data', (event) => {
// There are multiple events in the eventStream, but all we
// care about are Records events. If the event is a Records
// event, there is data inside it
if (event.Records) {
records.push(event.Records.Payload);
}
})
.on('error', (err) => {
reject(err);
})
.on('end', () => {
// Convert the array of bytes into a buffer, and then
// convert that to a string
let planetString = Buffer.concat(records).toString('utf8');
// remove any trailing commas
planetString = planetString.replace(/\,$/, '');
// Add into JSON 'array'
planetString = `[${planetString}]`;
try {
const planetData = JSON.parse(planetString);
resolve(planetData);
} catch (e) {
reject(new Error(`Unable to convert S3 data to JSON object. S3 Select Query: ${params.Expression}`));
}
});
});
})
}
@hpl002
Copy link

hpl002 commented Aug 30, 2020

Thanks for this!

Great starting point for what i needed, but i had a slight issue..

While you check for data at line 44, the remaining code will run, even after reject.

You could of course handle this elsewhere, but if a error handling mechanism is not implemented then this will cause a runtime error.

I fixed this by wrapping it in a else block.

For reference (with my alterations):

const getDataUsingS3Select = async (params) => {
  // 1
  return new Promise((resolve, reject) => {
    s3.selectObjectContent(params, (err, data) => {
      if (err) {
        reject(err);
      }

      if (!data) {
        reject("Empty data object");
      } 
//Wrapping in else. Data.Payload will throw a runtime error if data is null
else {
        // This will be an array of bytes of data, to be converted
        // to a buffer
        const records = [];

        // This is a stream of events
        data.Payload.on("data", (event) => {
          // There are multiple events in the eventStream, but all we
          // care about are Records events. If the event is a Records
          // event, there is data inside it
          if (event.Records) {
            records.push(event.Records.Payload);
          }
        })
          .on("error", (err) => {
            reject(err);
          })
          .on("end", () => {
            // Convert the array of bytes into a buffer, and then
            // convert that to a string
            let planetString = Buffer.concat(records).toString("utf8");

            // 2
            // remove any trailing commas
            planetString = planetString.replace(/\,$/, "");

            // 3
            // Add into JSON 'array'
            planetString = `[${planetString}]`;

            try {
              resolve(planetString);
            } catch (e) {
              reject(
                new Error(
                  `Unable to convert S3 data to JSON object. S3 Select Query: ${params.Expression}`
                )
              );
            }
          });
      }
    });
  });
};

@thetrevorharmon
Copy link
Author

@hpl002 this is great! Thanks for your comment!

@lucianosmori
Copy link

lucianosmori commented Jan 21, 2023

  "errorType": "ReferenceError",
  "errorMessage": "require is not defined in ES module scope, you can use import instead",
  "trace": [
    "ReferenceError: require is not defined in ES module scope, you can use import instead",
    "    at file:///var/task/index.mjs:3:13",
    "    at ModuleJob.run (node:internal/modules/esm/module_job:193:25)",
    "    at async Promise.all (index 0)",
    "    at async ESMLoader.import (node:internal/modules/esm/loader:530:24)",
    "    at async _tryAwaitImport (file:///var/runtime/index.mjs:921:16)",
    "    at async _tryRequire (file:///var/runtime/index.mjs:970:86)",
    "    at async _loadUserApp (file:///var/runtime/index.mjs:994:16)",
    "    at async UserFunction.js.module.exports.load (file:///var/runtime/index.mjs:1035:21)",
    "    at async start (file:///var/runtime/index.mjs:1200:23)",
    "    at async file:///var/runtime/index.mjs:1206:1"
  ]
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment