Created
February 27, 2014 13:27
-
-
Save comerford/9249951 to your computer and use it in GitHub Desktop.
Creating an odd chunk distribution in MongoDB - mistaken pre-split
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// start a shell from the command line, do not connect to a database | |
./mongo --nodb | |
// using that shell start a new cluster, with a 1MB chunk size | |
cluster = new ShardingTest({shards: 2, chunksize: 1}); | |
// open another shell (previous one will be full of logging and not actually connected to anything) | |
./mongo --port 30999 | |
// stop the balancer | |
sh.stopBalancer() | |
sh.getBalancerState() | |
// select test DB, enable sharding | |
use chunktest; | |
sh.enableSharding("chunktest"); | |
// Need a GUID function, bit hacky, but it works: | |
function GUID () { | |
var S4 = function () { | |
return Math.floor( | |
Math.random() * 0x10000 /* 65536 */ | |
).toString(16); | |
}; | |
return ( | |
"" + S4() + S4() + S4() + S4() + S4() + S4() + S4() + S4() | |
); | |
} | |
// insert 10,000,000 docs *before* trying thr pre-split | |
// NOTE: this is a mistake usually, doing this on purpose | |
for(var i = 0; i < 10000000; i++){db.prefixsplit.insert({"_id" : GUID(), "date" : new Date(), "otherID" : new ObjectId()})} | |
//Shard the collection (this will cause initial splits, lots of them) | |
sh.shardCollection("chunktest.prefixsplit", {"_id" : 1}); | |
// Attempt now to "pre" split the collection after the initial split | |
for ( var x=0; x<16; x++ ){ | |
for( var y=0; y<16; y++ ) { | |
for ( var z=0; z<16; z+=4 ) { | |
var prefix = '' + x.toString(16) + y.toString(16) + z.toString(16) + "00000000000000000000000000000"; | |
db.adminCommand( { split : "chunktest.prefixsplit" , middle : { _id : prefix } } ); | |
} | |
} | |
} | |
// now check out the chunk info, will be pretty weird | |
AllChunkInfo = function(ns){ | |
var chunks = db.getSiblingDB("config").chunks.find({"ns" : ns}).sort({min:1}); //this will return all chunks for the ns ordered by min | |
//some counters for overall stats at the end | |
var totalChunks = 0; | |
var totalSize = 0; | |
var totalEmpty = 0; | |
print("ChunkID,ChunkSize,ObjectsInChunk"); | |
// iterate over all the chunks, print out info for each | |
chunks.forEach( | |
function printChunkInfo(chunk) { | |
var db1 = db.getSiblingDB(chunk.ns.split(".")[0]); // could do this without the var but gets unwieldy | |
var key = db.getSiblingDB("config").collections.findOne({_id:chunk.ns}).key; // will need this for the dataSize call | |
var dataSizeResult = db1.runCommand({datasize:chunk.ns, keyPattern:key, min:chunk.min, max:chunk.max, estimate:true}); // returns the info we need on the data | |
// printjson(dataSizeResult); // uncomment to see how long it takes to run and status | |
// print("***********Chunk Information***********"); | |
print(chunk._id+","+dataSizeResult.size+","+dataSizeResult.numObjects); | |
// print("Chunk ID: " + chunk._id); | |
// print("Chunk Size: "+dataSizeResult.size); | |
// print("Objects in chunk: "+dataSizeResult.numObjects); | |
totalSize += dataSizeResult.size; | |
totalChunks++; | |
if (dataSizeResult.size == 0) { totalEmpty++ }; //count empty chunks for summary | |
} | |
) | |
print("***********Summary Chunk Information***********"); | |
print("Total Chunks: "+totalChunks); | |
print("Average Chunk Size (bytes): "+(totalSize/totalChunks)); | |
print("Empty Chunks: "+totalEmpty); | |
print("Average Chunk Size (non-empty): "+(totalSize/(totalChunks-totalEmpty))); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment