Skip to content

Instantly share code, notes, and snippets.

@raffi-minassian
Last active December 23, 2020 02:57
Show Gist options
  • Save raffi-minassian/a1caf2c224dec9318d30 to your computer and use it in GitHub Desktop.
Save raffi-minassian/a1caf2c224dec9318d30 to your computer and use it in GitHub Desktop.
Quick and dirty script to copy files in AWS S3 to an S3 bucket in another AWS account with Node.js.
/*
* This is a quick and dirty script for copying files in AWS S3 to other buckets.
* This is written so it can work across AWS accounts.
* You can hack in code to apply work or logic to each file.
*
* For each file, it downloads locally to a tmp file and then starts uploading as soon as possible
* and deletes the file immediately when the upload is done.
*
* Best way to use: Spin up a micro in EC2, install node.js,
* create a directory and install the dependencies with
* npm install aws-sdk
* npm install async
* npm install node-uuid
* then run with node ./s3copy-across-aws-accounts.js.
*
* Dont Forget: 1. Make a folder named 'downloads' in the directory you will run from.
* 2. Replace keys and bucket names in this file.
* 3. Tweak any other code you want (simultaneous max uploads, downloads, etc)
*
* See also: AWS SDK: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/frames.html
* Async for Node.js: https://github.com/caolan/async
* Blog post about this: http://www.everyhaironyourhead.com/copy-files-in-aws-s3-to-an-s3-bucket-in-another-aws-account-with-node-js/
*/
var AWS = require('aws-sdk');
var async = require('async');
var fs = require('fs');
var uuid = require('node-uuid');
var sourceS3Conf = new AWS.Config({
accessKeyId: 'SOURCEACCESSKEYGOESHERE', //replace this with the S3 Access Key for the source bucket
secretAccessKey: 'SOURCESECRETACCESSKEYGOESHERE' //replace this with the S3 Secret Access Key for the source bucket
});
var sourceS3 = new AWS.S3(sourceS3Conf);
var destS3Conf = new AWS.Config({
accessKeyId: 'DESTINATIONACCESSKEYGOESHERE', //replace this with the S3 Access Key for the destination bucket
secretAccessKey: 'DESTINATIONSECRETACCESSKEYGOESHERE' //replace this with the S3 Secret Access Key for the destination bucket
});
var destS3 = new AWS.S3(destS3Conf);
var q = async.queue(upload, 5); //create an upload task queue that allows 5 simultaneous uploads
download(); //kick off the transfer
function download(marker){
//listparams are the options for listing the source bucket contents. See AWS SDK.
var listparams = {
Bucket: 'SOURCE-BUCKET-NAME', //replace this with the name of the source bucket
EncodingType: 'url',
MaxKeys: 100, //Request 100 keys at a time
Delimiter: '::'
};
//We request 100 bucket entry keys at a time, until we get through all of them.
//AWS returns the starting key for the next 100 as NextMarker
if(marker){
listparams.Marker = marker;
}
sourceS3.listObjects(listparams, function(lerr, info) {
if (lerr){
console.log(lerr, lerr.stack);
return; // if there is a problem getting the list, its all over.
}
//process the list of 100 keys (max) from the bucket
async.eachLimit(info.Contents, 5, function iterator(item, cb){ // allow 5 simultaneous downloads
//Add code here for processing the bucket entries. Tailor as needed.
//For example, here we are skipping any entries that have a zero size.
if(item.Size == 0){
cb();
return;
}
var getparams = {
Bucket: 'SOURCE-BUCKET-NAME', //replace this with the name of the source bucket
Key: item.Key
};
sourceS3.getObject(getparams, function(gerr, s3obj) { //download the object from the bucket
if (gerr){
//if there is problem just print to console and move on.
console.log('Download issue with '+item.Key);
console.log(gerr);
cb();
return;
}
var uname = uuid.v4(); //using uuid as a unique name for the tmp file we save
//save the file to disk temporarily in a folder called downloads
//until we upload it to the target bucket.
//you can apply any rules or filters on the files here.
fs.writeFile('./downloads/'+uname, s3obj.Body, {encoding: null}, function(fserr){
if(fserr){
//if there is problem just print to console and move on.
cb(fserr);
return;
}
//if everything went well downloading, add it to the upload queue.
q.push({"filename": uname, "s3key": item.Key});
console.log('Added '+item.Key+' to queue');
cb();
});
});
},
function done(aerr){
if(aerr){
return console.log(aerr);
}
if(info.NextMarker){ //if there are more keys in the bucket, get the next 100
process.nextTick(function(){
download(info.NextMarker);
});
} else {
console.log('Downloads Done! Please wait for uploads to finish.');
}
});
});
}
function upload(uitem, qcb){ //function to upload each item in the upload queue to the destination bucket
var stream = fs.createReadStream('./downloads/'+uitem.filename);
destS3.putObject({
Bucket: 'DESTINATION-BUCKET-NAME', //replace this with the name of the destination bucket
Key: uitem.s3key,
Body: stream
},
function(uerr, data){
if(uerr){
//if there is an upload problem just print to console and move on.
qcb(uerr);
return;
}
//if the upload was successful, delete the local tmp file
fs.unlink('./downloads/'+uitem.filename, function(delerr){
if(delerr){
//if there was a problem with that just print to console and move on.
qcb(delerr);
return;
}
console.log('Moved '+uitem.s3key);
qcb();
});
});
}
@nagarajasn
Copy link

Not able to download large files . Any suggestions?

@michaelyao
Copy link

Thank you very much! The script is solid. I added headObject to get the ContentType copied over as well.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment