Skip to content

Instantly share code, notes, and snippets.

@baranovxyz
Created May 11, 2020 21:00
Show Gist options
  • Save baranovxyz/212be417ffce56ba824b358cf76b8902 to your computer and use it in GitHub Desktop.
Save baranovxyz/212be417ffce56ba824b358cf76b8902 to your computer and use it in GitHub Desktop.
// use unique uuids
const { v4: uuidv4 } = require('uuid');
// split will be done if values size exceed ~15Mb
const SPLIT_SIZE = 15e6;
async function insertOneMongoDB(_id, data) {
/* implemented somewhere */
}
async function insert(column) {
// identificators for our column
const ids = [column._id];
let chunk = 0;
let chunkStart = 0;
let chunkSize = 0;
// we need indexes, so lets use good old `for` loop
for (let i = 0, len = column.values.length; i < len; i++) {
// naive approach to determine document size
// read about better approach in the next article
if (column.values[i] === null) chunkSize += 2 + 2;
else chunkSize += column.values[i].length * 2 + 2; // 2 bytes for each char
if (chunkSize > SPLIT_SIZE) {
await saveChunk(ids, ++chunk, column.values.slice(chunkStart, i + 1));
chunkStart = i + 1;
chunkSize = 0;
}
}
if (chunk > 0) {
// save last chunk
await saveChunk(ids, ++chunk, column.values.slice(chunkStart), false);
// remove values from splitted document, etc:
column.compression = 'CHUNKS';
column.values = [];
column._splitted = true; // mark column as splitted
column._next = ids[1];
}
// save column as is if it fits one document,
// else column will be saved with _splitted: true, _next: firstChunkId
return insertOneMongoDB(_id, column);
}
// helper function
async function saveChunk(ids, chunk, values, next = true) {
// add uid if we don't have it yet for our chunk
if (!ids[chunk]) ids.push(uuidv4());
// add uid for next chunk if we know there will be next chunk
if (next) ids.push(uuidv4());
const column = {
_id: ids[chunk],
_next: ids[chunk + 1], // undefined for last chunk, which is fine
_chunk: true,
values,
};
return insertOneMongoDB(ids[chunk], column);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment