Skip to content

Instantly share code, notes, and snippets.

function compress(column) {
column.compression = 'DICTIONARY';
const dictionary = [];
const indexes = {};
column.values = column.values.map(value => {
if (value === null) return null;
// add string to dictionary if there is no such string yet
if (indexes[value] === undefined) {
indexes[value] = dictionary.length;
// use unique uuids
const { v4: uuidv4 } = require('uuid');
// split will be done if values size exceed ~15Mb
const SPLIT_SIZE = 15e6;
async function insertOneMongoDB(_id, data) {
/* implemented somewhere */
}
async function findOneMongoDB(query) {
/* implemented somewhere */
}
async function get(query) {
let doc = await findOneMongoDB(query);
// return doc at once if it is not splitted
if (!doc._splitted) return doc;
const { _id, type } = doc;
interface Column {
_id: string; // MongoDB id field
type: 'string'; // yes, it is literally 'string'
values: (string | null)[]; // an array, each element is either string or null
}
const doc = {};
console.log(BSON.serialize(doc));
// <Buffer 05 00 00 00 00>
let doc = { _id: 1 };
console.log(BSON.serialize(doc));
// <Buffer 0e 00 00 00 10 5f 69 64 00 01 00 00 00 00>
let doc = { type: 'string' };
console.log(BSON.serialize(doc));
// <Buffer 16 00 00 00 02 74 79 70 65 00 07 00 00 00 73 74 72 69 6e 67 00 00>
let doc = { values: ['first'] };
console.log(BSON.serialize(doc));
// <Buffer 1f 00 00 00 04 76 61 6c 75 65 73 00 12 00 00 00 02 30 00 06 00 00 00 66 69 72 73 74 00 00 00>
let doc = { values: [null] };
console.log(BSON.serialize(doc));
// <Buffer 15 00 00 00 04 76 61 6c 75 65 73 00 08 00 00 00 0a 30 00 00 00>
function calculateColumnSize(column) {
let size = 0;
size += 4; // 4 bytes for int32 for a document size
// `_id` key-value where _id value is a string
// 10 bytes + ? = 1(value type) + 3(`_id` key length) + 1(key terminator) + 4(int32 string length) + ?(value length) + 1(string value terminator)
size += 10 + column._id.length;
// `type` key value with 'string' as value
// 17 bytes = 1(value type) + 4(`type` key length) + 1(key terminator) + 4(int32 string length) + 6(`string` length) + 1(string value terminator)
size += 17;
// `values` array with string or null values