Created
June 9, 2020 21:10
-
-
Save primiano/8a0daabbbc5f8e28defad7c6e25579bd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const encoder = new TextEncoder('utf-8'); | |
const decoder = new TextDecoder('utf-8'); | |
const TAG_MASK = 0x3; | |
const TAG_INT32 = 0; | |
const TAG_FLOAT64 = 1; | |
const TAG_STRING = 2; | |
function encode(cells) { | |
// Overestimate, will resize at the end. | |
let buf = new ArrayBuffer(cells.length * 40); | |
const b8 = new Uint8Array(buf); | |
const b32 = new Uint32Array(buf); | |
const f64 = new Float64Array(buf); | |
// [num cells] [cell bitmap] | |
// [2048 payload] | |
// [strings] | |
let chunkStart = 0; | |
let bOff = 8; | |
let pStart = 0; | |
let pOff = 0; | |
let sOff = 0; | |
function finalizeChunk() { | |
const nextChunkStart = (~~((sOff + 7) / 8)) * 8; | |
const chunkLen = nextChunkStart - chunkStart; | |
const numCells = (bOff - chunkStart - 8); | |
console.log('new chunk', chunkLen, numCells); | |
b32[chunkStart / 4] = chunkLen; | |
b32[chunkStart / 4 + 1] = numCells; | |
chunkStart += chunkLen; | |
bOff = chunkStart + 8; | |
pStart = chunkStart + BITMAP_SIZE; | |
pOff = pStart; | |
sOff = pOff + INT_PAYLOAD_SIZE; | |
return nextChunkStart; | |
} | |
finalizeChunk(); | |
for (const cell of cells) { | |
if (0 * cell === 0) { // a number | |
if (cell <= 0x7FFFFFFF) { | |
b8[bOff++] = TAG_INT32; | |
b32[pOff/4] = cell; | |
pOff += 4; | |
} else { | |
b8[bOff++] = TAG_FLOAT64; | |
const idx = ~~((pOff + 7) / 8); | |
f64[idx] = cell; | |
pOff = idx * 8 + 8; | |
} | |
} else { // a string | |
b8[bOff++] = TAG_STRING; | |
sOff += encoder.encodeInto(cell, b8.subarray(sOff)).written; | |
b8[sOff++] = '\0'; | |
} | |
if (bOff - chunkStart >= BITMAP_SIZE || pOff - pStart >= INT_PAYLOAD_SIZE - 8) { | |
finalizeChunk(); | |
} | |
} | |
return buf.slice(0, finalizeChunk()); | |
} | |
function decodeAndIterate(buf) { | |
const b8 = new Uint8Array(buf); | |
const b32 = new Uint32Array(buf); | |
const f64 = new Float64Array(buf); | |
let numCells = 0; | |
let chunkStart = 0; | |
const hasher = new Hasher(); | |
for (;;) { | |
let chunkSize = b32[chunkStart / 4]; | |
let cellsInChunk = b32[chunkStart / 4 + 1]; | |
let pStart = chunkStart + BITMAP_SIZE; | |
let pOff = pStart; | |
let sStart = pOff + INT_PAYLOAD_SIZE; | |
let sEnd = chunkStart + chunkSize; | |
let strings = decoder.decode(b8.subarray(sStart, sEnd)).split('\0'); | |
let strIdx = 0; | |
// console.log('sz', chunkSize, 'cells', cellsInChunk, 'strings', sEnd - sStart); | |
for (let bOff = chunkStart + 8; bOff < chunkStart + 8 + cellsInChunk; bOff++) { | |
const tag = b8[bOff]; | |
let cell; | |
if (tag === TAG_INT32) { | |
cell = b32[pOff / 4]; | |
pOff += 4; | |
hasher.update(cell); | |
} else if (tag === TAG_FLOAT64) { | |
const idx = ~~((pOff + 7) / 8); | |
cell = f64[idx]; | |
pOff = idx * 8 + 8; | |
hasher.update(cell); | |
} else if (tag === TAG_STRING) { | |
cell = strings[strIdx++]; | |
hasher.update(cell.length); | |
} | |
numCells++; | |
} // For cell | |
chunkStart += chunkSize; | |
if (chunkStart >= buf.byteLength) break; | |
} | |
return [numCells, hasher.digest]; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment