comerford · February 27, 2014 13:27
diff --git a/chunktest.js b/chunktest.js
 // start a shell from the command line, do not connect to a database
 ./mongo --nodb
 // using that shell start a new cluster, with a 1MB chunk size
 cluster = new ShardingTest({shards: 2, chunksize: 1});
 // open another shell (previous one will be full of logging and not actually connected to anything)
 ./mongo --port 30999
 // stop the balancer
 sh.stopBalancer()
 sh.getBalancerState()
 // select test DB, enable sharding
 use chunktest;
 sh.enableSharding("chunktest");
 // Need a GUID function, bit hacky, but it works:
 function GUID () {
    var S4 = function () {
        return Math.floor(
                Math.random() * 0x10000 /* 65536 */
            ).toString(16);
    };

    return (
            "" + S4() + S4() + S4() + S4() + S4() + S4() + S4() + S4()
        );
 }
 // insert 10,000,000 docs *before* trying thr pre-split
 // NOTE: this is a mistake usually, doing this on purpose
 for(var i = 0; i < 10000000; i++){db.prefixsplit.insert({"_id" : GUID(), "date" : new Date(), "otherID" : new ObjectId()})}
 //Shard the collection (this will cause initial splits, lots of them)
 sh.shardCollection("chunktest.prefixsplit", {"_id" : 1});
 // Attempt now to "pre" split the collection after the initial split
 for ( var x=0; x<16; x++ ){
  for( var y=0; y<16; y++ ) {
 	  for ( var z=0; z<16; z+=4 ) {
    	var prefix = '' + x.toString(16) + y.toString(16) + z.toString(16) + "00000000000000000000000000000";
    	db.adminCommand( { split : "chunktest.prefixsplit" , middle : { _id : prefix } } );
 	  }
  }
 }

 // now check out the chunk info, will be pretty weird

 AllChunkInfo = function(ns){
 	var chunks = db.getSiblingDB("config").chunks.find({"ns" : ns}).sort({min:1}); //this will return all chunks for the ns ordered by min
 	//some counters for overall stats at the end
 	var totalChunks = 0;
 	var totalSize = 0;
 	var totalEmpty = 0;
 	print("ChunkID,ChunkSize,ObjectsInChunk");
 	// iterate over all the chunks, print out info for each	
 	chunks.forEach( 
 		function printChunkInfo(chunk) { 
 		
 		var db1 = db.getSiblingDB(chunk.ns.split(".")[0]); // could do this without the var but gets unwieldy
 		var key = db.getSiblingDB("config").collections.findOne({_id:chunk.ns}).key; // will need this for the dataSize call
 		var dataSizeResult = db1.runCommand({datasize:chunk.ns, keyPattern:key, min:chunk.min, max:chunk.max, estimate:true}); // returns the info we need on the data
 		// printjson(dataSizeResult); // uncomment to see how long it takes to run and status
 		// print("***********Chunk Information***********");
 		print(chunk._id+","+dataSizeResult.size+","+dataSizeResult.numObjects); 
 		// print("Chunk ID: " + chunk._id); 
 		// print("Chunk Size: "+dataSizeResult.size);
 		// print("Objects in chunk: "+dataSizeResult.numObjects);
 		totalSize += dataSizeResult.size;
 		totalChunks++;
 		if (dataSizeResult.size == 0) { totalEmpty++ }; //count empty chunks for summary
 		}
 	)
 	print("***********Summary Chunk Information***********");
 	print("Total Chunks: "+totalChunks);
 	print("Average Chunk Size (bytes): "+(totalSize/totalChunks));
 	print("Empty Chunks: "+totalEmpty);
 	print("Average Chunk Size (non-empty): "+(totalSize/(totalChunks-totalEmpty)));
 }
	// start a shell from the command line, do not connect to a database
	./mongo --nodb
	// using that shell start a new cluster, with a 1MB chunk size
	cluster = new ShardingTest({shards: 2, chunksize: 1});
	// open another shell (previous one will be full of logging and not actually connected to anything)
	./mongo --port 30999
	// stop the balancer
	sh.stopBalancer()
	sh.getBalancerState()
	// select test DB, enable sharding
	use chunktest;
	sh.enableSharding("chunktest");
	// Need a GUID function, bit hacky, but it works:
	function GUID () {
	var S4 = function () {
	return Math.floor(
	Math.random() * 0x10000 /* 65536 */
	).toString(16);
	};

	return (
	"" + S4() + S4() + S4() + S4() + S4() + S4() + S4() + S4()
	);
	}
	// insert 10,000,000 docs before trying thr pre-split
	// NOTE: this is a mistake usually, doing this on purpose
	for(var i = 0; i < 10000000; i++){db.prefixsplit.insert({"_id" : GUID(), "date" : new Date(), "otherID" : new ObjectId()})}
	//Shard the collection (this will cause initial splits, lots of them)
	sh.shardCollection("chunktest.prefixsplit", {"_id" : 1});
	// Attempt now to "pre" split the collection after the initial split
	for ( var x=0; x<16; x++ ){
	for( var y=0; y<16; y++ ) {
	for ( var z=0; z<16; z+=4 ) {
	var prefix = '' + x.toString(16) + y.toString(16) + z.toString(16) + "00000000000000000000000000000";
	db.adminCommand( { split : "chunktest.prefixsplit" , middle : { _id : prefix } } );
	}
	}
	}

	// now check out the chunk info, will be pretty weird

	AllChunkInfo = function(ns){
	var chunks = db.getSiblingDB("config").chunks.find({"ns" : ns}).sort({min:1}); //this will return all chunks for the ns ordered by min
	//some counters for overall stats at the end
	var totalChunks = 0;
	var totalSize = 0;
	var totalEmpty = 0;
	print("ChunkID,ChunkSize,ObjectsInChunk");
	// iterate over all the chunks, print out info for each
	chunks.forEach(
	function printChunkInfo(chunk) {

	var db1 = db.getSiblingDB(chunk.ns.split(".")[0]); // could do this without the var but gets unwieldy
	var key = db.getSiblingDB("config").collections.findOne({_id:chunk.ns}).key; // will need this for the dataSize call
	var dataSizeResult = db1.runCommand({datasize:chunk.ns, keyPattern:key, min:chunk.min, max:chunk.max, estimate:true}); // returns the info we need on the data
	// printjson(dataSizeResult); // uncomment to see how long it takes to run and status
	// print("*********Chunk Information*********");
	print(chunk._id+","+dataSizeResult.size+","+dataSizeResult.numObjects);
	// print("Chunk ID: " + chunk._id);
	// print("Chunk Size: "+dataSizeResult.size);
	// print("Objects in chunk: "+dataSizeResult.numObjects);
	totalSize += dataSizeResult.size;
	totalChunks++;
	if (dataSizeResult.size == 0) { totalEmpty++ }; //count empty chunks for summary
	}
	)
	print("*********Summary Chunk Information*********");
	print("Total Chunks: "+totalChunks);
	print("Average Chunk Size (bytes): "+(totalSize/totalChunks));
	print("Empty Chunks: "+totalEmpty);
	print("Average Chunk Size (non-empty): "+(totalSize/(totalChunks-totalEmpty)));
	}