Last active
May 6, 2021 00:47
-
-
Save rsms/0401b43477395f3619bab75d064ed716 to your computer and use it in GitHub Desktop.
Compile-time constant binary tree for fast byte-array key lookups in JavaScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export interface BTreeNode<T> { | |
k :ArrayLike<byte> | |
v :T | |
L? :BTreeNode<T> | |
R? :BTreeNode<T> | |
} | |
export class BTree<T> { | |
readonly root :BTreeNode<T> | |
constructor(root :BTreeNode<T>) { | |
this.root = root | |
} | |
get(key :ArrayLike<byte>) :T|null { | |
return lookup(key, this.root) | |
} | |
} | |
// These two lookup functions are about as fast. Use whichever style you prefer. | |
// function lookup<T>(key :ArrayLike<byte>, n :BTreeNode<T>) :T|null { | |
// let c = bufcmp(key, n.k) | |
// return ( | |
// (c == -1) ? n.L ? lookup(key, n.L) : null : | |
// (c == 1) ? n.R ? lookup(key, n.R) : null : | |
// (key.length == n.k.length) ? n.v : | |
// null | |
// ) | |
// } | |
function lookup<T>(key :ArrayLike<byte>, n :BTreeNode<T>) :T|null { | |
while (n) { | |
const c = bufcmp(key, n.k) | |
if (c == -1) { | |
n = n.L as BTreeNode<T> | |
} else if (c == 1) { | |
n = n.R as BTreeNode<T> | |
} else if (key.length == n.k.length) { | |
return n.v | |
} else { | |
break | |
} | |
} | |
return null | |
} | |
function bufcmp(a :ArrayLike<byte>, b :ArrayLike<byte>) :int { | |
const aL = a.length, bL = b.length, L = (aL < bL ? aL : bL) | |
for (let i = 0; i != L; ++i) { | |
if (a[i] < b[i]) { return -1 } | |
if (b[i] < a[i]) { return 1 } | |
} | |
return ( | |
aL < bL ? -1 : | |
bL < aL ? 1 : | |
0 | |
) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { BTree } from './btree' | |
// generated by gen-btree.js | |
const cdat = new Uint8Array([ | |
103,111,99,111,110,116,105,110,117,101,99,97,115,101,98,114,101,97,107,99, | |
104,97,110,99,111,110,115,116,101,108,115,101,100,101,102,97,117,108,116, | |
100,101,102,101,114,102,97,108,108,116,104,114,111,117,103,104,102,111,114, | |
102,117,110,99,112,97,99,107,97,103,101,105,102,103,111,116,111,105,109,112, | |
111,114,116,105,110,116,101,114,102,97,99,101,109,97,112,115,101,108,101,99, | |
116,114,97,110,103,101,114,101,116,117,114,110,115,119,105,116,99,104,115, | |
116,114,117,99,116,116,121,112,101,118,97,114]); | |
const keywords = new BTree<token>( | |
{ k: cdat.subarray(0,2) /*go*/, v: token.GO, | |
L:{ k: cdat.subarray(2,10) /*continue*/, v: token.CONTINUE, | |
L:{ k: cdat.subarray(10,14) /*case*/, v: token.CASE, | |
L:{ k: cdat.subarray(14,19) /*break*/, v: token.BREAK}, | |
R:{ k: cdat.subarray(19,23) /*chan*/, v: token.CHAN, | |
R:{ k: cdat.subarray(23,28) /*const*/, v: token.CONST}}}, | |
R:{ k: cdat.subarray(28,32) /*else*/, v: token.ELSE, | |
L:{ k: cdat.subarray(32,39) /*default*/, v: token.DEFAULT, | |
R:{ k: cdat.subarray(39,44) /*defer*/, v: token.DEFER}}, | |
R:{ k: cdat.subarray(44,55) /*fallthrough*/, v: token.FALLTHROUGH, | |
R:{ k: cdat.subarray(55,58) /*for*/, v: token.FOR, | |
R:{ k: cdat.subarray(58,62) /*func*/, v: token.FUNC}}}}}, | |
R:{ k: cdat.subarray(62,69) /*package*/, v: token.PACKAGE, | |
L:{ k: cdat.subarray(69,71) /*if*/, v: token.IF, | |
L:{ k: cdat.subarray(71,75) /*goto*/, v: token.GOTO}, | |
R:{ k: cdat.subarray(75,81) /*import*/, v: token.IMPORT, | |
R:{ k: cdat.subarray(81,90) /*interface*/, v: token.INTERFACE, | |
R:{ k: cdat.subarray(90,93) /*map*/, v: token.MAP}}}}, | |
R:{ k: cdat.subarray(93,99) /*select*/, v: token.SELECT, | |
L:{ k: cdat.subarray(99,104) /*range*/, v: token.RANGE, | |
R:{ k: cdat.subarray(104,110) /*return*/, v: token.RETURN}}, | |
R:{ k: cdat.subarray(110,116) /*switch*/, v: token.SWITCH, | |
L:{ k: cdat.subarray(116,122) /*struct*/, v: token.STRUCT}, | |
R:{ k: cdat.subarray(122,126) /*type*/, v: token.TYPE, | |
R:{ k: cdat.subarray(126,129) /*var*/, v: token.VAR}}}}}} | |
) | |
// lookupKeyword maps an identifier to its keyword token or IDENT | |
// (if not a keyword). | |
// | |
function lookupKeyword(ident :ArrayLike<byte>) :token { | |
return keywords.get(ident) || token.IDENT | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ConstData { | |
constructor(name /*string*/) { | |
this.name = name | |
this.bytes = [] // byte[] | |
} | |
alloc(bytes /*ArrayLike<byte>*/) { | |
const start = this.bytes.length | |
const end = start + bytes.length | |
this.bytes = this.bytes.concat(bytes) | |
return `${this.name}.subarray(${start},${end})` | |
} | |
getInitJS() { | |
return `const ${this.name} = new Uint8Array([${this.bytes.join(',')}])` | |
} | |
} | |
function genBTree(cdat /*ConstData*/, m /*Map*/) { | |
const jsIdRe = /^[a-zA-Z0-9_]+$/ | |
const indentation = ' ' | |
const pairs = [] | |
const sortedKeys = Array.from(m.keys()) | |
sortedKeys.sort() | |
for (let i = 0; i < sortedKeys.length; ++i) { | |
const k = sortedKeys[i] | |
pairs.push({ key: k, value: m.get(k)}) | |
} | |
const genBranch = (pairs, indent) => { | |
let pair, leftPairs, rightPairs | |
if (pairs.length == 1) { | |
pair = pairs[0] | |
} else { | |
const midIndex = Math.floor(pairs.length / 2)-1 | |
pair = pairs[midIndex] | |
leftPairs = pairs.slice(0, midIndex) | |
rightPairs = pairs.slice(midIndex + 1) | |
} | |
let s = `{ k: ${cdat.alloc(strbytes(pair.key))} /*${pair.key.replace('*/','*-/')}*/, v: ${pair.value}` | |
if (leftPairs && leftPairs.length) { | |
s += `,\n${indent} L:${genBranch(leftPairs, indent + indentation)}` | |
} | |
if (rightPairs && rightPairs.length) { | |
s += `,\n${indent} R:${genBranch(rightPairs, indent + indentation)}` | |
} | |
return s + `}` | |
} | |
return genBranch(pairs, indentation) | |
} | |
function strbytes(s) { | |
return Array.from(s).map(s => s.charCodeAt(0)) | |
} | |
const cdat = new ConstData('cdat') | |
const keywordsJS = genBTree(cdat, new Map([ | |
["break", "token.BREAK"], | |
["case", "token.CASE"], | |
["chan", "token.CHAN"], | |
["const", "token.CONST"], | |
["continue", "token.CONTINUE"], | |
["default", "token.DEFAULT"], | |
["defer", "token.DEFER"], | |
["else", "token.ELSE"], | |
["fallthrough", "token.FALLTHROUGH"], | |
["for", "token.FOR"], | |
["func", "token.FUNC"], | |
["go", "token.GO"], | |
["goto", "token.GOTO"], | |
["if", "token.IF"], | |
["import", "token.IMPORT"], | |
["interface", "token.INTERFACE"], | |
["map", "token.MAP"], | |
["package", "token.PACKAGE"], | |
["range", "token.RANGE"], | |
["return", "token.RETURN"], | |
["select", "token.SELECT"], | |
["struct", "token.STRUCT"], | |
["switch", "token.SWITCH"], | |
["type", "token.TYPE"], | |
["var", "token.VAR"], | |
])) | |
console.log( | |
cdat.getInitJS() + ';\n' + | |
'const keywords =\n ' + keywordsJS | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function strbytes(s) { | |
return Array.from(s).map(s => s.charCodeAt(0)) | |
} | |
function strbuf(s) { | |
return new Uint8Array(strbytes(s)) | |
} | |
function bufstr(b) { | |
return new Buffer(b).toString('utf8') | |
// const dec = new TextDecoder('utf-8') | |
// return dec.decode(b) | |
} | |
const cdat = new Uint8Array([ | |
103,111,99,111,110,116,105,110,117,101,99,97,115,101,98,114,101,97,107,99, | |
104,97,110,99,111,110,115,116,101,108,115,101,100,101,102,97,117,108,116, | |
100,101,102,101,114,102,97,108,108,116,104,114,111,117,103,104,102,111,114, | |
102,117,110,99,112,97,99,107,97,103,101,105,102,103,111,116,111,105,109,112, | |
111,114,116,105,110,116,101,114,102,97,99,101,109,97,112,115,101,108,101,99, | |
116,114,97,110,103,101,114,101,116,117,114,110,115,119,105,116,99,104,115, | |
116,114,117,99,116,116,121,112,101,118,97,114 | |
]); | |
const keywords = | |
{ k: cdat.subarray(0,2) /*go*/, v: "GO", | |
L:{ k: cdat.subarray(2,10) /*continue*/, v: "CONTINUE", | |
L:{ k: cdat.subarray(10,14) /*case*/, v: "CASE", | |
L:{ k: cdat.subarray(14,19) /*break*/, v: "BREAK"}, | |
R:{ k: cdat.subarray(19,23) /*chan*/, v: "CHAN", | |
R:{ k: cdat.subarray(23,28) /*const*/, v: "CONST"}}}, | |
R:{ k: cdat.subarray(28,32) /*else*/, v: "ELSE", | |
L:{ k: cdat.subarray(32,39) /*default*/, v: "DEFAULT", | |
R:{ k: cdat.subarray(39,44) /*defer*/, v: "DEFER"}}, | |
R:{ k: cdat.subarray(44,55) /*fallthrough*/, v: "FALLTHROUGH", | |
R:{ k: cdat.subarray(55,58) /*for*/, v: "FOR", | |
R:{ k: cdat.subarray(58,62) /*func*/, v: "FUNC"}}}}}, | |
R:{ k: cdat.subarray(62,69) /*package*/, v: "PACKAGE", | |
L:{ k: cdat.subarray(69,71) /*if*/, v: "IF", | |
L:{ k: cdat.subarray(71,75) /*goto*/, v: "GOTO"}, | |
R:{ k: cdat.subarray(75,81) /*import*/, v: "IMPORT", | |
R:{ k: cdat.subarray(81,90) /*interface*/, v: "INTERFACE", | |
R:{ k: cdat.subarray(90,93) /*map*/, v: "MAP"}}}}, | |
R:{ k: cdat.subarray(93,99) /*select*/, v: "SELECT", | |
L:{ k: cdat.subarray(99,104) /*range*/, v: "RANGE", | |
R:{ k: cdat.subarray(104,110) /*return*/, v: "RETURN"}}, | |
R:{ k: cdat.subarray(110,116) /*switch*/, v: "SWITCH", | |
L:{ k: cdat.subarray(116,122) /*struct*/, v: "STRUCT"}, | |
R:{ k: cdat.subarray(122,126) /*type*/, v: "TYPE", | |
R:{ k: cdat.subarray(126,129) /*var*/, v: "VAR"}}}}}} | |
function bufcmp(a, b /*ArrayLike<byte>*/) { | |
const aL = a.length, bL = b.length, L = (aL < bL ? aL : bL) | |
for (let i = 0; i != L; ++i) { | |
if (a[i] < b[i]) { return -1 } | |
if (b[i] < a[i]) { return 1 } | |
} | |
return ( | |
aL < bL ? -1 : | |
bL < aL ? 1 : | |
0 | |
) | |
} | |
function lookupRecurse(key /*Uint8Array*/, n /*Node*/) { | |
let c = bufcmp(key, n.k) | |
return ( | |
(c == -1) ? n.L ? lookupRecurse(key, n.L) : null : | |
(c == 1) ? n.R ? lookupRecurse(key, n.R) : null : | |
(key.length == n.k.length) ? n.v : | |
null | |
) | |
} | |
function lookupLoop(key /*:Uint8Array*/, n /*Node*/) { | |
while (n) { | |
const c = bufcmp(key, n.k) | |
if (c == -1) { | |
n = n.L | |
} else if (c == 1) { | |
n = n.R | |
} else if (key.length == n.k.length) { | |
return n.v | |
} else { | |
break | |
} | |
} | |
return null | |
} | |
const assert = require('assert') | |
function bench(label, f, iterations=100000) { | |
if (typeof gc != 'undefined') { | |
gc() | |
} | |
let timeStart = process.hrtime() | |
for (let i = 0; i != iterations; ++i) { | |
f() | |
} | |
let timeEnd = process.hrtime() | |
let start = (timeStart[0] * 1000) + (timeStart[1] / 1000000) | |
let end = (timeEnd[0] * 1000) + (timeEnd[1] / 1000000) | |
let d = end - start | |
let avgd = d / iterations | |
let duration = ( | |
avgd < 0.0001 ? `${avgd * 1000000} ns` : | |
avgd < 0.1 ? `${(avgd * 1000).toFixed(2)} µs` : | |
`${avgd.toFixed(2)} ms` | |
) | |
console.log(`${label} ${duration} (avg)`) | |
} | |
let wordsMap = (function() { | |
let m = new Map() | |
let n = keywords | |
const f = (n) => { | |
m.set(n.k, n.v) | |
m.set(n.k.subarray(0, n.k.lenght - 1), null) // miss | |
m.set(strbuf(bufstr(n.k + 'blabla')), null) // miss | |
if (n.L) { f(n.L) } | |
if (n.R) { f(n.R) } | |
} | |
f(keywords) | |
return m | |
})() | |
bench('lookupRecurse', () => { | |
for (const [k, v] of wordsMap) { | |
assert.equal(v, lookupRecurse(k, keywords)) | |
} | |
}) | |
bench('lookupLoop', () => { | |
for (const [k, v] of wordsMap) { | |
assert.equal(v, lookupLoop(k, keywords)) | |
} | |
}) | |
// MacBook 15 (2015), nodejs v8.1.3 | |
// lookupRecurse 7.82 µs (avg) | |
// lookupLoop 6.79 µs (avg) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment