Last active
September 3, 2019 15:40
-
-
Save H-Max/8af45114d01a7569bd8ab810586aa586 to your computer and use it in GitHub Desktop.
Shard and enable pre-splitting (on hashed index) for collection(s) in a MongoDB Database.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Shard collection(s) before restoring or mass importing data in them | |
Script has to run against a mongos instance (not mongod), with a user having: | |
- "clusterAdmin" privilege | |
- "readWrite" privilege on database to shard | |
For each collection to shard, you will have to estimate the "numInitialChunks" value depending on : | |
- The volume of data to import/insert | |
- The configured chunks size for your instance | |
Author : Henri-Maxime Ducoulombier ( [email protected] ) | |
*/ | |
var dbName = 'myDB', // Name of the database to work on | |
myAdmin = db.getSiblingDB("admin"), // Admin DB pointer to run admin commands | |
dropFirst = false, // Set to true to drop collection(s) first. Default is false for safety reasons | |
enableSharding = false, // If set to true, will enable sharding on database prior to sharding collection (required for not yet sharded databases) | |
disableBalancer = true, // If set to true, will disable the balancer before sharding (can take some time if balancing is in progress) | |
enableBalancer = false, // If set to true, will re-enable the balancer after the operation | |
shardCollections = [ // List of collections to shard, with settings per collection | |
{"collection": "collection1", "numInitialChunks": 12, "key": {"myShardingKey": "hashed"}, "name": "shard_idx"}, | |
{"collection": "collection2", "numInitialChunks": 18, "key": {"myShardingKey": "hashed"}, "name": "shard_idx"}, | |
{"collection": "collection3", "numInitialChunks": 24, "key": {"myShardingKey": "hashed"}, "name": "shard_idx"}, | |
]; | |
if (disableBalancer){ | |
// Disable balancer first (you should always do that when backuping/restoring data) | |
sh.setBalancerState(false); | |
} | |
if (enableSharding){ | |
// Enable sharding on DB (only if sharding is not yet enabled on database) | |
db.getSiblingDB("admin").runCommand({"enableSharding": dbName}); | |
} | |
// Browse collection(s) to shard | |
shardCollections.forEach(function(d){ | |
var collName = dbName + '.' + d.collection; // Complete name space of current collection to shard (DB + collection name) | |
if (dropFirst){ | |
// Drop collection first (including indices) | |
// Warning : This command obtains a write lock on the affected database and will block other operations until it has completed. | |
db.getSiblingDB(dbName).getCollection(d.collection).drop(); | |
} | |
// Create sharding index (required to give the index a name) | |
db.getSiblingDB(dbName).getCollection(d.collection).createIndex(d.key, {"name": d.name}); | |
// Shard collection | |
db.getSiblingDB("admin").runCommand({"shardCollection": collName, "key": d.key, "numInitialChunks": d.numInitialChunks}); | |
}); | |
// Re-enable balancer only if it was disabled and config says it has to be re-enabled | |
if (disableBalancer && enableBalancer){ | |
sh.setBalancerState(true); | |
} | |
// Note : after the operation is done, you can check sharding distribution with : | |
// db.getSiblingDB(dbName).getCollection(PUT_COLLECTION_NAME_HERE).getShardDistribution(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment