Last active
January 22, 2023 02:49
-
-
Save YouriT/e40075a03bf37e7e1d43e9c6cdf7293b to your computer and use it in GitHub Desktop.
Programatically pre-split evenly a base64 shard key across mongo shards
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SHA1 base64 sharding... | |
// Following: http://stackoverflow.com/questions/19672302/how-to-programatically-pre-split-a-guid-based-shard-key-with-mongodb | |
// Available as a Gist at: https://gist.github.com/YouriT/e40075a03bf37e7e1d43e9c6cdf7293b | |
// INSTRUCTIONS: | |
// - $ npm i fractional | |
// - $ node pre-sharding.js | |
// - It will print all the commands that need to be executed within mongos, in order to create the sharding | |
// and then chunk the dataset | |
// First, we split based on the number of shards | |
var Fraction = require('fractional').Fraction; | |
var CHARS_COUNT = 64; | |
var SHARDS = [ | |
'rs0', | |
'rs1' | |
]; | |
var COLLECTION = 'test-db.testcollections'; | |
var SHARD_KEY = 'value'; | |
var KEY_LENGTH = 28; | |
var ESTIMATED_DATA_SIZE = 150 * 1e3; // in MB | |
var MAX_CHUNCK_SIZE = 64; // in MB | |
if (SHARDS.length > CHARS_COUNT) { | |
throw Error('Must have less shards that chars or modify this code...'); | |
} | |
var MIN_CHUNKS = ESTIMATED_DATA_SIZE / MAX_CHUNCK_SIZE, | |
IDEAL_CHUNCKS = 0, // Power of CHARS_COUNT | |
NUMBER_OF_CHARS_NEEDED = 0, // This determines the number of loops needed... | |
LAST_ITERATOR_INC = 1; | |
for (var i = 0; IDEAL_CHUNCKS < MIN_CHUNKS; i++) { | |
IDEAL_CHUNCKS = Math.pow(CHARS_COUNT, i); | |
NUMBER_OF_CHARS_NEEDED = i; | |
} | |
LAST_ITERATOR_INC = Math.floor(CHARS_COUNT / new Fraction(Math.ceil(IDEAL_CHUNCKS / MIN_CHUNKS * 100) / 100).denominator); | |
// console.log(MIN_CHUNKS, IDEAL_CHUNCKS, IDEAL_CHUNCKS / MIN_CHUNKS, LAST_ITERATOR_INC); | |
console.log(`sh.enableSharding("${COLLECTION.split('.')[0]}");`); | |
console.log(`sh.shardCollection("${COLLECTION}", {${SHARD_KEY}: 1}, true);`); | |
var Base64 = { // Credits to: http://stackoverflow.com/questions/6213227/fastest-way-to-convert-a-number-to-radix-64-in-javascript | |
_Rixits: | |
// 0 8 16 24 32 40 48 56 63 | |
// v v v v v v v v v | |
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/", | |
// You have the freedom, here, to choose the glyphs you want for | |
// representing your base-64 numbers. The ASCII encoding guys usually | |
// choose a set of glyphs beginning with ABCD..., but, looking at | |
// your update #2, I deduce that you want glyphs beginning with | |
// 0123..., which is a fine choice and aligns the first ten numbers | |
// in base 64 with the first ten numbers in decimal. | |
// This cannot handle negative numbers and only works on the | |
// integer part, discarding the fractional part. | |
// Doing better means deciding on whether you're just representing | |
// the subset of javascript numbers of twos-complement 32-bit integers | |
// or going with base-64 representations for the bit pattern of the | |
// underlying IEEE floating-point number, or representing the mantissae | |
// and exponents separately, or some other possibility. For now, bail | |
fromNumber: function(number) { | |
if (isNaN(Number(number)) || number === null || | |
number === Number.POSITIVE_INFINITY) | |
throw "The input is not valid"; | |
if (number < 0) | |
throw "Can't represent negative numbers now"; | |
var rixit; // like 'digit', only in some non-decimal radix | |
var residual = Math.floor(number); | |
var result = ''; | |
while (true) { | |
rixit = residual % 64 | |
// console.log("rixit : " + rixit); | |
// console.log("result before : " + result); | |
result = this._Rixits.charAt(rixit) + result; | |
// console.log("result after : " + result); | |
// console.log("residual before : " + residual); | |
residual = Math.floor(residual / 64); | |
// console.log("residual after : " + residual); | |
if (residual == 0) | |
break; | |
} | |
return result; | |
}, | |
toNumber: function(rixits) { | |
var result = 0; | |
// console.log("rixits : " + rixits); | |
// console.log("rixits.split('') : " + rixits.split('')); | |
rixits = rixits.split(''); | |
for (var e = 0; e < rixits.length; e++) { | |
// console.log("_Rixits.indexOf(" + rixits[e] + ") : " + | |
// this._Rixits.indexOf(rixits[e])); | |
// console.log("result before : " + result); | |
result = (result * 64) + this._Rixits.indexOf(rixits[e]); | |
// console.log("result after : " + result); | |
} | |
return result; | |
} | |
} | |
var tempShardKeyObj = {}; | |
var commands = []; | |
for (var x = CHARS_COUNT / SHARDS.length; x < CHARS_COUNT; x += (CHARS_COUNT / SHARDS.length)) { | |
var prefix = "" + Base64.fromNumber(x) + (new Array(KEY_LENGTH - 1).fill('0').join('')); | |
tempShardKeyObj[SHARD_KEY] = prefix; | |
var command = { | |
split: COLLECTION, | |
middle: tempShardKeyObj | |
}; | |
commands.push(command); | |
console.log('db.adminCommand(' + JSON.stringify(command) + ');'); | |
// db.adminCommand(command); | |
} | |
commands.forEach(function(el, i) { | |
console.log('sh.moveChunk("' + COLLECTION + '", ' + JSON.stringify(command.middle) + ', "' + SHARDS[i + 1] + '");'); | |
}); | |
var currentLoopLetter = 'i'; | |
var LOOPS = 'var prefixes = [];\n'; | |
var PREFIX_STRING = `var prefix = "" + `; | |
for (var i = 0; i < NUMBER_OF_CHARS_NEEDED; i++) { | |
PREFIX_STRING += `Base64.fromNumber(${String.fromCharCode(currentLoopLetter.charCodeAt(0) + i)}) + `; | |
if (i == NUMBER_OF_CHARS_NEEDED - 1) { | |
PREFIX_STRING += '"' + (new Array(KEY_LENGTH - NUMBER_OF_CHARS_NEEDED).fill('0').join('')) + '";'; | |
} | |
} | |
for (var i = 0; i < NUMBER_OF_CHARS_NEEDED; i++) { | |
if (i < NUMBER_OF_CHARS_NEEDED - 1) { | |
LOOPS += `\nfor (var ${currentLoopLetter} = 0; ${currentLoopLetter} < ${CHARS_COUNT}; ${currentLoopLetter}++) {`; | |
} else { | |
LOOPS += `\nfor (var ${currentLoopLetter} = 0; ${currentLoopLetter} < ${CHARS_COUNT}; ${currentLoopLetter} += ${LAST_ITERATOR_INC}) { | |
${PREFIX_STRING} | |
prefixes.push(prefix); | |
db.adminCommand({ | |
split: "${COLLECTION}", | |
middle: { | |
${SHARD_KEY}: prefix | |
} | |
}); | |
}`; | |
} | |
currentLoopLetter = String.fromCharCode(currentLoopLetter.charCodeAt(0) + 1); | |
} | |
for (var i = 0; i < NUMBER_OF_CHARS_NEEDED - 1; i++) { | |
LOOPS += '\n}'; | |
} | |
var OUT = ` | |
var Base64 = { // Credits to: http://stackoverflow.com/questions/6213227/fastest-way-to-convert-a-number-to-radix-64-in-javascript | |
_Rixits: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/", | |
fromNumber: function(number) { | |
if (isNaN(Number(number)) || number === null || | |
number === Number.POSITIVE_INFINITY) | |
throw "The input is not valid"; | |
if (number < 0) | |
throw "Can't represent negative numbers now"; | |
var rixit; // like 'digit', only in some non-decimal radix | |
var residual = Math.floor(number); | |
var result = ''; | |
while (true) { | |
rixit = residual % 64; | |
result = this._Rixits.charAt(rixit) + result; | |
residual = Math.floor(residual / 64); | |
if (residual == 0) | |
break; | |
} | |
return result; | |
}, | |
toNumber: function(rixits) { | |
var result = 0; | |
rixits = rixits.split(''); | |
for (var e = 0; e < rixits.length; e++) { | |
result = (result * 64) + this._Rixits.indexOf(rixits[e]); | |
} | |
return result; | |
} | |
} | |
${LOOPS} | |
prefixes.length`; | |
console.log(OUT); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
When I asked chatgpt to explain base64, this gist code came up. Greate work.