Skip to content

Instantly share code, notes, and snippets.

@YouriT
Last active January 22, 2023 02:49
Show Gist options
  • Save YouriT/e40075a03bf37e7e1d43e9c6cdf7293b to your computer and use it in GitHub Desktop.
Save YouriT/e40075a03bf37e7e1d43e9c6cdf7293b to your computer and use it in GitHub Desktop.
Programatically pre-split evenly a base64 shard key across mongo shards
// SHA1 base64 sharding...
// Following: http://stackoverflow.com/questions/19672302/how-to-programatically-pre-split-a-guid-based-shard-key-with-mongodb
// Available as a Gist at: https://gist.github.com/YouriT/e40075a03bf37e7e1d43e9c6cdf7293b
// INSTRUCTIONS:
// - $ npm i fractional
// - $ node pre-sharding.js
// - It will print all the commands that need to be executed within mongos, in order to create the sharding
// and then chunk the dataset
// First, we split based on the number of shards
var Fraction = require('fractional').Fraction;
var CHARS_COUNT = 64;
var SHARDS = [
'rs0',
'rs1'
];
var COLLECTION = 'test-db.testcollections';
var SHARD_KEY = 'value';
var KEY_LENGTH = 28;
var ESTIMATED_DATA_SIZE = 150 * 1e3; // in MB
var MAX_CHUNCK_SIZE = 64; // in MB
if (SHARDS.length > CHARS_COUNT) {
throw Error('Must have less shards that chars or modify this code...');
}
var MIN_CHUNKS = ESTIMATED_DATA_SIZE / MAX_CHUNCK_SIZE,
IDEAL_CHUNCKS = 0, // Power of CHARS_COUNT
NUMBER_OF_CHARS_NEEDED = 0, // This determines the number of loops needed...
LAST_ITERATOR_INC = 1;
for (var i = 0; IDEAL_CHUNCKS < MIN_CHUNKS; i++) {
IDEAL_CHUNCKS = Math.pow(CHARS_COUNT, i);
NUMBER_OF_CHARS_NEEDED = i;
}
LAST_ITERATOR_INC = Math.floor(CHARS_COUNT / new Fraction(Math.ceil(IDEAL_CHUNCKS / MIN_CHUNKS * 100) / 100).denominator);
// console.log(MIN_CHUNKS, IDEAL_CHUNCKS, IDEAL_CHUNCKS / MIN_CHUNKS, LAST_ITERATOR_INC);
console.log(`sh.enableSharding("${COLLECTION.split('.')[0]}");`);
console.log(`sh.shardCollection("${COLLECTION}", {${SHARD_KEY}: 1}, true);`);
var Base64 = { // Credits to: http://stackoverflow.com/questions/6213227/fastest-way-to-convert-a-number-to-radix-64-in-javascript
_Rixits:
// 0 8 16 24 32 40 48 56 63
// v v v v v v v v v
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/",
// You have the freedom, here, to choose the glyphs you want for
// representing your base-64 numbers. The ASCII encoding guys usually
// choose a set of glyphs beginning with ABCD..., but, looking at
// your update #2, I deduce that you want glyphs beginning with
// 0123..., which is a fine choice and aligns the first ten numbers
// in base 64 with the first ten numbers in decimal.
// This cannot handle negative numbers and only works on the
// integer part, discarding the fractional part.
// Doing better means deciding on whether you're just representing
// the subset of javascript numbers of twos-complement 32-bit integers
// or going with base-64 representations for the bit pattern of the
// underlying IEEE floating-point number, or representing the mantissae
// and exponents separately, or some other possibility. For now, bail
fromNumber: function(number) {
if (isNaN(Number(number)) || number === null ||
number === Number.POSITIVE_INFINITY)
throw "The input is not valid";
if (number < 0)
throw "Can't represent negative numbers now";
var rixit; // like 'digit', only in some non-decimal radix
var residual = Math.floor(number);
var result = '';
while (true) {
rixit = residual % 64
// console.log("rixit : " + rixit);
// console.log("result before : " + result);
result = this._Rixits.charAt(rixit) + result;
// console.log("result after : " + result);
// console.log("residual before : " + residual);
residual = Math.floor(residual / 64);
// console.log("residual after : " + residual);
if (residual == 0)
break;
}
return result;
},
toNumber: function(rixits) {
var result = 0;
// console.log("rixits : " + rixits);
// console.log("rixits.split('') : " + rixits.split(''));
rixits = rixits.split('');
for (var e = 0; e < rixits.length; e++) {
// console.log("_Rixits.indexOf(" + rixits[e] + ") : " +
// this._Rixits.indexOf(rixits[e]));
// console.log("result before : " + result);
result = (result * 64) + this._Rixits.indexOf(rixits[e]);
// console.log("result after : " + result);
}
return result;
}
}
var tempShardKeyObj = {};
var commands = [];
for (var x = CHARS_COUNT / SHARDS.length; x < CHARS_COUNT; x += (CHARS_COUNT / SHARDS.length)) {
var prefix = "" + Base64.fromNumber(x) + (new Array(KEY_LENGTH - 1).fill('0').join(''));
tempShardKeyObj[SHARD_KEY] = prefix;
var command = {
split: COLLECTION,
middle: tempShardKeyObj
};
commands.push(command);
console.log('db.adminCommand(' + JSON.stringify(command) + ');');
// db.adminCommand(command);
}
commands.forEach(function(el, i) {
console.log('sh.moveChunk("' + COLLECTION + '", ' + JSON.stringify(command.middle) + ', "' + SHARDS[i + 1] + '");');
});
var currentLoopLetter = 'i';
var LOOPS = 'var prefixes = [];\n';
var PREFIX_STRING = `var prefix = "" + `;
for (var i = 0; i < NUMBER_OF_CHARS_NEEDED; i++) {
PREFIX_STRING += `Base64.fromNumber(${String.fromCharCode(currentLoopLetter.charCodeAt(0) + i)}) + `;
if (i == NUMBER_OF_CHARS_NEEDED - 1) {
PREFIX_STRING += '"' + (new Array(KEY_LENGTH - NUMBER_OF_CHARS_NEEDED).fill('0').join('')) + '";';
}
}
for (var i = 0; i < NUMBER_OF_CHARS_NEEDED; i++) {
if (i < NUMBER_OF_CHARS_NEEDED - 1) {
LOOPS += `\nfor (var ${currentLoopLetter} = 0; ${currentLoopLetter} < ${CHARS_COUNT}; ${currentLoopLetter}++) {`;
} else {
LOOPS += `\nfor (var ${currentLoopLetter} = 0; ${currentLoopLetter} < ${CHARS_COUNT}; ${currentLoopLetter} += ${LAST_ITERATOR_INC}) {
${PREFIX_STRING}
prefixes.push(prefix);
db.adminCommand({
split: "${COLLECTION}",
middle: {
${SHARD_KEY}: prefix
}
});
}`;
}
currentLoopLetter = String.fromCharCode(currentLoopLetter.charCodeAt(0) + 1);
}
for (var i = 0; i < NUMBER_OF_CHARS_NEEDED - 1; i++) {
LOOPS += '\n}';
}
var OUT = `
var Base64 = { // Credits to: http://stackoverflow.com/questions/6213227/fastest-way-to-convert-a-number-to-radix-64-in-javascript
_Rixits: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/",
fromNumber: function(number) {
if (isNaN(Number(number)) || number === null ||
number === Number.POSITIVE_INFINITY)
throw "The input is not valid";
if (number < 0)
throw "Can't represent negative numbers now";
var rixit; // like 'digit', only in some non-decimal radix
var residual = Math.floor(number);
var result = '';
while (true) {
rixit = residual % 64;
result = this._Rixits.charAt(rixit) + result;
residual = Math.floor(residual / 64);
if (residual == 0)
break;
}
return result;
},
toNumber: function(rixits) {
var result = 0;
rixits = rixits.split('');
for (var e = 0; e < rixits.length; e++) {
result = (result * 64) + this._Rixits.indexOf(rixits[e]);
}
return result;
}
}
${LOOPS}
prefixes.length`;
console.log(OUT);
@KurkanRamazan
Copy link

When I asked chatgpt to explain base64, this gist code came up. Greate work.
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment