Created
January 22, 2013 04:18
-
-
Save mjhm/4592006 to your computer and use it in GitHub Desktop.
This is a performance test of several methods of updating every item in 100000 row MongoDB collection. See comment below for results of test on a small EC2 instance.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/node | |
var MongoClient = require('mongodb').MongoClient | |
var Server = require('mongodb').Server; | |
var async = require('async'); | |
var util = require('util'); | |
// A simple linear congruence random number generator. | |
// This is anticipating doing a comparable test with the aggregation pipeline. | |
var randMod = 2 << 24; | |
var randMult = 1140671485; | |
var randAdd = 12820163; | |
function myRand (seed) { | |
return (randMult*seed + randAdd) % randMod; | |
} | |
// Initial Documents to insert. | |
var ndoc = 100000; | |
var i = ndoc; | |
var docList = []; | |
while (i) { | |
i -= 1; | |
docList[i] = {randomVal: myRand(i)}; | |
} | |
var mongoClient = new MongoClient(new Server('localhost', 27017)); | |
var db = null; | |
var coll = null; | |
var anotherColl = null; | |
var finalReport = ''; | |
// Running all the tests with explicit async.series tasks. | |
async.series([ | |
function (dbOpenDone) { | |
mongoClient.open( function(err, mongoClient) { | |
if (err) { | |
dbOpenDone(err); | |
return; | |
} | |
db = mongoClient.db('test'); | |
coll = db.collection('update_benchmark'); | |
anotherColl = db.collection('update_benchmark_tmp'); | |
// db.admin().setProfilingLevel('all', function (plErr) { | |
// dbOpenDone(plErr); | |
// }); | |
dbOpenDone(err); | |
}); | |
}, | |
function (dropDone) { | |
// dropDone(null); return; // uncomment to skip drop | |
if (coll) { | |
coll.drop(function (err) { | |
// Ignore errors from drop. | |
dropDone(null); | |
}); | |
} else { | |
dropDone(null); | |
} | |
}, | |
function (dropDone) { | |
// dropDone(null); return; // uncomment to skip drop | |
if (anotherColl) { | |
anotherColl.drop(function (err) { | |
// Ignore errors from drop. | |
dropDone(null); | |
}); | |
} else { | |
dropDone(null); | |
} | |
}, | |
function (insertDone) { | |
// insertDone(null); return; // uncomment to skip insert | |
console.log('Inserting docList'); | |
var iChunk = 0; | |
var chunkSize = 10000; | |
async.until(function () {return (iChunk >= docList.length);}, | |
function (insertChunkDone) { | |
coll.insert(docList.slice(iChunk, iChunk + chunkSize), function (insErr) { | |
iChunk += chunkSize; | |
process.nextTick(function () { | |
console.log('Finished Insert ' + iChunk); | |
insertChunkDone(insErr); | |
}); | |
}); | |
}, | |
insertDone | |
); | |
}, | |
// Test Scenario 1: Pull all values onto the client and push them back with individual saves. | |
function (testScenario1Done) { | |
// testScenario1Done(null); return; // uncomment to skip testScenario 1 | |
var description = 'Big find().toArray, and individual saves.'; | |
console.log('Starting Test Scenario 1'); | |
var testStart = new Date().getTime(); | |
coll.find().toArray(function (err, itemList) { | |
if (err) { | |
testScenario1Done(err); | |
} | |
console.log('Retrieved all items in: ' + ((new Date().getTime() - testStart)/1000) + ' seconds'); | |
itemList.forEach(function (item) { | |
item.randomVal = myRand(item.randomVal); | |
}); | |
var i = 0; | |
async.forEach(itemList, function (item, itemSaveDone) { | |
var checkpointNote = null; | |
i += 1; | |
if (i % 10000 === 0) { | |
console.log('(1) Saving ' + i); | |
checkpointNote = '(1) Finished saving ' + i; | |
} | |
coll.save(item, function (err) { | |
if (checkpointNote) { | |
console.log(checkpointNote); | |
} | |
itemSaveDone(err); | |
}); | |
}, | |
function (err) { | |
var elapsed = (new Date().getTime() - testStart)/1000; | |
var rpt = 'testScenario1: ' + elapsed + ' seconds'; | |
console.log(rpt); | |
finalReport += rpt + ' (' + description + ')\n'; | |
testScenario1Done(err); | |
}); | |
}); | |
}, | |
// Test Scenario 2: Pull all values onto the client and push them back with individual updates. | |
function (testScenario2Done) { | |
// testScenario2Done(null); return; // uncomment to skip testScenario 2 | |
var description = 'Big find().toArray, and individual updates.'; | |
console.log('Starting Test Scenario 2'); | |
var testStart = new Date().getTime(); | |
coll.find().toArray(function (err, itemList) { | |
if (err) { | |
testScenario1Done(err); | |
} | |
console.log('Retrieved all items in:' + ((new Date().getTime() - testStart)/1000) + ' seconds'); | |
var i = 0; | |
async.forEach(itemList, function (item, itemUpdateDone) { | |
var checkpointNote = null; | |
i += 1; | |
if (i % 10000 === 0) { | |
console.log('(2) Updating ' + i); | |
checkpointNote = '(2) Finished updating ' + i; | |
} | |
coll.update( | |
{_id: item._id}, | |
{$set: {randomVal: myRand(item.randomVal)}}, | |
function (err) { | |
if (checkpointNote) { | |
console.log(checkpointNote); | |
} | |
itemUpdateDone(err); | |
} | |
); | |
}, | |
function (err) { | |
var testScenario2Elapsed = (new Date().getTime() - testStart)/1000; | |
var rpt = 'testScenario2: ' + testScenario2Elapsed + ' seconds'; | |
console.log(rpt); | |
finalReport += rpt + ' (' + description + ')\n'; | |
testScenario2Done(err); | |
}); | |
}); | |
}, | |
// Test Scenario 3: Iterate on cursor (small batch) and use update. | |
function (testScenario3Done) { | |
// testScenario3Done(null); return; // uncomment to skip testScenario 3 | |
var description = 'Iterate on cursor (size = 10), then update.'; | |
console.log('Starting Test Scenario 3'); | |
var testStart = new Date().getTime(); | |
var curs = coll.find().batchSize(10); | |
curs.count(function(err, count) { | |
var i = 0; | |
curs.each(function (err, item) { | |
var checkpointNote = null; | |
if (err) { | |
testScenario3Done(err); | |
} | |
if (!item) { | |
return; | |
} | |
i += 1; | |
if (i % 10000 === 0) { | |
checkpointNote = '(3) Finished updating ' + i; | |
console.log('(3) Updating ' + i); | |
} | |
coll.update( | |
{_id: item._id}, | |
{$set: {randomVal: myRand(item.randomVal)}}, | |
function (err) { | |
if (checkpointNote) { | |
console.log(checkpointNote); | |
} | |
if (err) { | |
testScenario3Done(err); | |
} | |
count -= 1; | |
if (count === 0) { | |
var elapsed = (new Date().getTime() - testStart)/1000; | |
var rpt = 'testScenario3: ' + elapsed + ' seconds'; | |
console.log(rpt); | |
finalReport += rpt + ' (' + description + ')\n'; | |
testScenario3Done(null); | |
} | |
} | |
); | |
}); | |
}); | |
}, | |
// Test Scenario 4: Iterate on cursor (large batch) and use update. | |
function (testScenario4Done) { | |
// testScenario4Done(null); return; // uncomment to skip testScenario 4 | |
var description = 'Iterate on cursor (size = 10000), then update.'; | |
console.log('Starting Test Scenario 4'); | |
var testStart = new Date().getTime(); | |
var curs = coll.find().batchSize(10000); | |
curs.count(function(err, count) { | |
var i = 0; | |
curs.each(function (err, item) { | |
var checkpointNote = null; | |
if (err) { | |
testScenario4Done(err); | |
} | |
if (!item) { | |
return; | |
} | |
i += 1; | |
if (i % 10000 === 0) { | |
checkpointNote = '(4) Finished updating ' + i; | |
console.log('(4) Updating ' + i); | |
} | |
coll.update( | |
{_id: item._id}, | |
{$set: {randomVal: myRand(item.randomVal)}}, | |
function (err) { | |
if (checkpointNote) { | |
console.log(checkpointNote); | |
} | |
if (err) { | |
testScenario4Done(err); | |
} | |
count -= 1; | |
if (count === 0) { | |
var elapsed = (new Date().getTime() - testStart)/1000; | |
var rpt = 'testScenario4: ' + elapsed + ' seconds'; | |
console.log(rpt); | |
finalReport += rpt + ' (' + description + ')\n'; | |
testScenario4Done(null); | |
} | |
} | |
); | |
}); | |
}); | |
}, | |
// Test Scenario 5: Iterate on cursor (large batch) and fill a brand new collection. | |
function (testScenario5Done) { | |
// testScenario5Done(null); return; // uncomment to skip testScenario 5 | |
var description = 'Iterate on cursor (size = 10000), then update.'; | |
console.log('Starting Test Scenario 5'); | |
var testStart = new Date().getTime(); | |
var curs = coll.find().batchSize(10000); | |
curs.count(function(err, count) { | |
var i = 0; | |
var newDocList = []; | |
curs.each(function (err, item) { | |
var checkpointNote = null; | |
if (err) { | |
testScenario5Done(err); | |
} | |
if (!item) { | |
return; | |
} | |
item.randomVal = myRand(item.randomVal); | |
newDocList[i%10000] = item; | |
i += 1; | |
if (i % 10000 === 0 || i === count) { | |
checkpointNote = '(5) Finished updating ' + i; | |
console.log('(5) Updating ' + i); | |
anotherColl.insert(newDocList, function (err) { | |
if (checkpointNote) { | |
console.log(checkpointNote); | |
} | |
if (err) { | |
testScenario5Done(err); | |
} | |
if (i === count) { | |
var elapsed = (new Date().getTime() - testStart)/1000; | |
var rpt = 'testScenario5: ' + elapsed + ' seconds'; | |
console.log(rpt); | |
finalReport += rpt + ' (' + description + ')\n'; | |
testScenario5Done(err); | |
} | |
}); | |
} | |
}); | |
}); | |
} | |
], | |
function (err) { | |
console.log('\n\nFinal Report:\n' + finalReport); | |
mongoClient.close(); | |
if (err) { | |
console.log(err); | |
process.exit(1); | |
} | |
process.exit(0); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here are the results running on a small EC2 instance with the MongoDB at localhost.
Though not included I also did a test with MapReduce used as a server side filter which ran at about 19 seconds. I would have liked to have similarly used "aggregate" as a server side filter, but it doesn't yet have an option to output to a collection.