Last active
August 8, 2022 07:49
-
-
Save michelem09/170a07d877e2787f7a53 to your computer and use it in GitHub Desktop.
Gist to update several items in series in a huge MongoDB collection (millions of records) with Node.js and Async module
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
/** | |
* Module dependencies. | |
*/ | |
var async = require('async'), | |
mongoose = require('mongoose'), | |
ObjectId = mongoose.Types.ObjectId; | |
var db = mongoose.connect('mongodb://localhost:27017/test', {}, function (err) { | |
if (err) { | |
console.error('\x1b[31m', 'Could not connect to MongoDB!'); | |
console.log(err); | |
} else { | |
console.log('Connected to MongoDB: ' + config.db); | |
// This is a huge collection you have to update some fields | |
var Hugecollection = mongoose.model('Hugecollection'); | |
// First of all count the total items | |
Hugecollection.count({}, function (err, tot) { | |
if (err) return console.log(err); | |
var skip = 0, | |
limit = 10000 | |
pages = (tot / limit).toFixed(0), | |
a = []; | |
console.log('Total items: ' + tot + ' - Total pages: ' + pages); | |
// Create an array of the page numbers to be queried | |
for (skip = 0; skip <= pages; skip++) { | |
a.push(skip); | |
} | |
// Give the page numbers array to an eachSeries method from Async, | |
// It will loop through them asking a number of limited (10000) items per time | |
// that's better then query for all items when you have a lot | |
async.eachSeries(a, function (key, done) { | |
// Find each batch of items | |
Hugecollection.find({}).select({ | |
_id: 1, | |
fieldtobechanged1: 1, | |
fieldtobechanged2: 1 | |
}).limit(limit).skip(key * limit).sort({ | |
_id: 1 | |
}).exec(function (err, items) { | |
if (err) return console.log(err); | |
console.log("Doing: " + key + "/" + pages + " - TEST THIS: " + items[0]._id); | |
// Asynchrously loop each item in the batch and do your changes | |
async.each(items, function (item, callback) { | |
if (item.fieldtobechanged1 && item.fieldtobechanged2) { | |
var doYourChange = function (value) { | |
return value; | |
}, | |
fieldtobechanged1 = doYourChange(item.fieldtobechanged1); | |
fieldtobechanged2 = doYourChange(item.fieldtobechanged2); | |
// Now update the item with your changes and you are done | |
Hugecollection.findOneAndUpdate({ | |
_id: new ObjectId(item._id) | |
}, { | |
$set: { | |
fieldtobechanged1: fieldtobechanged1, | |
fieldtobechanged2: fieldtobechanged2 | |
} | |
}, function (err, updated) { | |
callback(); | |
}); | |
} else { | |
callback(); | |
} | |
}, done()); // done is call when all items are updated! | |
}); | |
}, function (err) { | |
console.log('Done!'); | |
mongoose.disconnect(); | |
}); | |
}); | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment