-
-
Save magegu/ea94cca4a40a764af487 to your computer and use it in GitHub Desktop.
/* | |
by Martin Güther @magegu | |
just call it: | |
uploadFile(absoluteFilePath, callback); | |
*/ | |
var path = require('path'); | |
var async = require('async'); | |
var fs = require('fs'); | |
var AWS = require('aws-sdk'); | |
AWS.config.loadFromPath('./aws.json'); | |
var s3 = new AWS.S3(); | |
var bucketName = "YOUR BUCKET NAME"; | |
function uploadMultipart(absoluteFilePath, fileName, uploadCb) { | |
s3.createMultipartUpload({ Bucket: bucketName, Key: fileName }, (mpErr, multipart) => { | |
if(!mpErr){ | |
//console.log("multipart created", multipart.UploadId); | |
fs.readFile(absoluteFilePath, (err, fileData) => { | |
var partSize = 1024 * 1024 * 5; | |
var parts = Math.ceil(fileData.length / partSize); | |
async.timesSeries(parts, (partNum, next) => { | |
var rangeStart = partNum*partSize; | |
var end = Math.min(rangeStart + partSize, fileData.length); | |
console.log("uploading ", fileName, " % ", (partNum/parts).toFixed(2)); | |
partNum++; | |
async.retry((retryCb) => { | |
s3.uploadPart({ | |
Body: fileData.slice(rangeStart, end), | |
Bucket: bucketName, | |
Key: fileName, | |
PartNumber: partNum, | |
UploadId: multipart.UploadId | |
}, (err, mData) => { | |
retryCb(err, mData); | |
}); | |
}, (err, data) => { | |
//console.log(data); | |
next(err, {ETag: data.ETag, PartNumber: partNum}); | |
}); | |
}, (err, dataPacks) => { | |
s3.completeMultipartUpload({ | |
Bucket: bucketName, | |
Key: fileName, | |
MultipartUpload: { | |
Parts: dataPacks | |
}, | |
UploadId: multipart.UploadId | |
}, uploadCb); | |
}); | |
}); | |
}else{ | |
uploadCb(mpErr); | |
} | |
}); | |
} | |
function uploadFile(absoluteFilePath, uploadCb) { | |
var fileName = path.basename(absoluteFilePath); | |
var stats = fs.statSync(absoluteFilePath) | |
var fileSizeInBytes = stats["size"] | |
if(fileSizeInBytes < (1024*1024*5)) { | |
async.retry((retryCb) => { | |
fs.readFile(absoluteFilePath, (err, fileData) => { | |
s3.putObject({ | |
Bucket: bucketName, | |
Key: fileName, | |
Body: fileData | |
}, retryCb); | |
}); | |
}, uploadCb); | |
}else{ | |
uploadMultipart(absoluteFilePath, fileName, uploadCb) | |
} | |
} | |
It worked fine for me. Thanks a lot
There is really no need to use this approach anymore unless more finite control is required. Try ManagedUpload instead.
How do I get the url / Location. It just returns Etag
Hi,
First of all, thanks for the code. It is useful.
After taking the multipart.js , I added below lines to call the upload function. Upload works fine, but in between if network goesdown it throws error
//Error starts here
Cannot read property 'ETag' of null
at Response.s3.uploadPart (D:\vijay\multipart.js:54:33)
at Request. (D:\vijay\node_modules\aws-sdk\lib\request.js:364:18)
//Ends here
Calling Function :
uploadFile(absoluteFilePath,function(err,data){
if(err){
console.log('erroring out...');
next(err, {ETag: data.ETag, PartNumber: partNum});
}
Hey... I loved this gist. I wrote some tests that I am going to share here (they are in typescript but will a little nip and tuck... you get my point)
import * as AWS from "aws-sdk";
import { expect } from "chai";
import { describe } from "mocha";
import * as sinon from "sinon";
import { SinonSandbox } from "sinon";
// import uploadMultipart somehow
type S3CallbackStub = (error: Error | null, data?: any) => void;
const file = __dirname + '/data.txt';
const bucket = 'test-bucket';
const key = 'test-file-key';
const S3Stub = {
failures: 1,
createMultipartUpload(_params: any, callback: S3CallbackStub) {
callback(null, { UploadId: '0' });
},
uploadPart(_params: any, callback: S3CallbackStub) {
if (this.failures > 0) { // add some failure to make sure the async code functions
this.failures -= 1;
return callback(new Error("Failed to upload part"));
}
callback(null, { ETag: '1' });
},
completeMultipartUpload(_params: any, callback: S3CallbackStub) {
callback(null, { Bucket: bucket });
}
};
describe(`AWS Multipart Upload Task`, async () => {
let sandbox: SinonSandbox;
beforeEach(() => {
sandbox = sinon.createSandbox();
sandbox.stub(AWS, 'S3').returns(S3Stub);
});
afterEach(() => {
sandbox.restore();
});
it("should return from the main upload method", (done) => {
const s3 = new AWS.S3(); // this should not be the stub
uploadMultipart(file, key, (err, data) => {
expect(data.Bucket).to.equal(bucket);
done();
});
});
});
Not suitable for large file size. I am getting error while using 10gb of file.
RangeError [ERR_FS_FILE_TOO_LARGE]: File size (10485760000) is greater than possible Buffer: 2147483647 bytes
can we used read stream instead readfile?
Thanks for code.
RangeError [ERR_FS_FILE_TOO_LARGE]: File size (10485760000) is greater than possible Buffer: 2147483647 bytes
I think that's an fs
error and has nothing to do with s3.
Check this answer on stackoverflow, it might fix your issue.
https://stackoverflow.com/a/63553430/11084093
Thanks a lot @magegu , that was really helpful.
But you need to update the code to change (1024_1024_5)=5MB to (1024_1024_1024*5)=5GB
Why is that?
I am sorry that I am pretty new in node js, can anyone post a short demo of how to use this code for multipart upload a huge size file (like 20 Gb) from S3 bucket ( assume has no permission issue), thank you,.
Just one question. Does this occupy the nodejs server's disk space during the upload process?
Extremely helpful thanks. would it make sense as an NPM package?
@soubhikchatterjee I think it would, but you could swap memfs for fs
to avoid that I think
https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#upload-property
Does the SDK handle this for us now or no?
This works fine in my end. Thanks a lot!
When working with huge files,
readFile()
might exceed available Buffer