Created
April 1, 2018 13:32
-
-
Save felipecrv/236f82183bbb27bd01033f94fe42e69b to your computer and use it in GitHub Desktop.
In-memory B+ Tree implemented in Javascript with Flow type annotations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* @flow */ | |
const KEY_KIND_STRING = 1; | |
const KEY_KIND_NUMBER = 2; | |
const KEY_KIND_BOOL = 3; | |
const KEY_KIND_RECORD = 4; | |
type KeyKind = 1 | 2 | 3 | 4; | |
class KeyValue<K, V> { | |
key: ?K; | |
value: ?V; | |
constructor(key: ?K, value: ?V) { | |
this.key = key; | |
this.value = value; | |
} | |
}; | |
type KeyValues<K, V> = Array<KeyValue<K, V>>; | |
type Node<K, V> = InnerNode<K, V> | LeafNode<K, V>; | |
type Atom = number | string | boolean; | |
function isAtomic(kind: KeyKind): boolean { | |
return kind < Btree.KEY_KIND_RECORD; | |
} | |
function ltAtom(a: any, b: any): boolean { | |
// NULLs are considered smaller than any other key | |
if (a === null || a === undefined) { | |
return b !== null && b !== undefined; | |
} | |
if (b === null || b === undefined) { | |
return false; | |
} | |
return a < b; | |
} | |
function ltRecord(a: any, b: any): boolean { | |
// NULLs are considered smaller than any other key | |
if (a === null || a === undefined) { | |
return b !== null && b !== undefined; | |
} | |
if (b === null || b === undefined) { | |
return false; | |
} | |
return a.lt(b); | |
} | |
function lt(kind: KeyKind, a: any, b: any): boolean { | |
return isAtomic(kind) ? ltAtom(a, b) : ltRecord(a, b); | |
} | |
function eqAtom(a: any, b: any): boolean { | |
if (a === null || a === undefined) { | |
return b === null || b === undefined; | |
} | |
return a === b; | |
} | |
function eqRecord(a: any, b: any): boolean { | |
if (a === null || a === undefined) { | |
return b === null || b === undefined; | |
} | |
if (b !== null && b !== undefined) { | |
return a.eq(b); | |
} | |
return false; | |
} | |
function eq(kind: KeyKind, a: any, b: any): boolean { | |
return isAtomic(kind) ? eqAtom(a, b) : eqRecord(a, b); | |
} | |
function findUpperBoundIndex<V>(keyKind: KeyKind, values: KeyValues<any, V>, key: any): number { | |
const _lt = isAtomic(keyKind) ? ltAtom : ltRecord; | |
let lo = 0; | |
let hi = values.length; | |
while (lo < hi) { | |
// Invariants: | |
// - answer in [lo, hi] and lo < hi | |
// - values.length / 2 <= mid <= values.length | |
let mid = Math.floor((lo + hi) / 2); | |
if (_lt(values[mid].key, key)) { | |
lo = mid + 1; | |
} else { | |
hi = mid; | |
} | |
} | |
return lo; | |
} | |
function findUpperBoundKeyIndex<V>(keyKind: KeyKind, keys: Array<any>, key: any): number { | |
const _lt = isAtomic(keyKind) ? ltAtom : ltRecord; | |
let lo = 0; | |
let hi = keys.length; | |
while (lo < hi) { | |
// Invariants: | |
// - answer in [lo, hi] and lo < hi | |
// - values.length / 2 <= mid <= values.length | |
let mid = Math.floor((lo + hi) / 2); | |
if (_lt(keys[mid], key)) { | |
lo = mid + 1; | |
} else { | |
hi = mid; | |
} | |
} | |
return lo; | |
} | |
class InnerNode<K, V> { | |
// k0, k1, .., kn-2 | |
keys: Array<?K>; | |
// [.., k0], [.., k1], .., [.., kn-2], [.., kn-1] | |
children: Array<Node<K, V>>; | |
constructor(nodes: Array<Node<K, V>>) { | |
this.children = nodes; | |
this.keys = new Array(nodes.length - 1); | |
for (let i = 0; i < nodes.length - 1; i++) { | |
this.keys[i] = nodes[i].maxKey(); | |
} | |
} | |
// Recursive: O(log N) | |
minKey(): ?K { | |
return this.children[0].minKey(); | |
} | |
// Recursive: O(log N) | |
maxKey(): ?K { | |
return this.children[this.children.length - 1].maxKey(); | |
} | |
// Recursive | |
size() { | |
let len = 0; | |
const first = this.children[0]; | |
if (first.values) { | |
const leaves: Array<LeafNode<K, V>> = (this.children: any); | |
for (let i = 0; i < leaves.length; i++) { | |
len += leaves[i].values.length; | |
} | |
} else { | |
const nodes: Array<InnerNode<K, V>> = (this.children: any); | |
for (let i = 0; i < nodes.length; i++) { | |
len += nodes[i].size(); | |
} | |
} | |
return len; | |
} | |
} | |
class LeafNode<K, V> { | |
values: KeyValues<K, V>; | |
constructor(values: KeyValues<K, V>) { | |
this.values = values; | |
} | |
// O(1) | |
minKey(): ?K { | |
return this.values[0].key; | |
} | |
// O(1) | |
maxKey(): ?K { | |
return this.values[this.values.length - 1].key; | |
} | |
} | |
class Btree<K, V> { | |
static KEY_KIND_STRING: KeyKind; | |
static KEY_KIND_NUMBER: KeyKind; | |
static KEY_KIND_BOOL: KeyKind; | |
static KEY_KIND_RECORD: KeyKind; | |
static KeyValue: Class<KeyValue<K, V>>; | |
static InnerNode: Class<InnerNode<K, V>>; | |
static LeafNode: Class<LeafNode<K, V>>; | |
static detail: { | |
isAtomic: typeof(isAtomic), | |
lt: typeof(lt), | |
eq: typeof(eq), | |
findUpperBoundIndex: typeof(findUpperBoundIndex), | |
findUpperBoundKeyIndex: typeof(findUpperBoundKeyIndex), | |
}; | |
root: Node<K, V>; | |
keyKind: KeyKind; | |
maxNodeSize: number; | |
constructor(keyKind: KeyKind, maxNodeSize: number = 16) { | |
this.root = new LeafNode([]); | |
this.keyKind = keyKind; | |
this.maxNodeSize = maxNodeSize; | |
} | |
// Returns the leaf that can contain `key` | |
findLeaf(key: K): LeafNode<K, V> { | |
let node = this.root; | |
// While a leaf is not found... | |
while (!node.values) { | |
const inner: InnerNode<K, V> = (node: any); | |
const i = findUpperBoundKeyIndex(this.keyKind, inner.keys, key); | |
node = inner.children[i]; | |
} | |
// node is a LeafNode | |
return (node: any); | |
} | |
find(key: K): ?KeyValue<K, V> { | |
const leaf = this.findLeaf(key); | |
const i = findUpperBoundIndex(this.keyKind, leaf.values, key); | |
const pair = leaf.values[i]; | |
return (pair && eq(this.keyKind, key, pair.key)) ? pair : undefined; | |
} | |
partitionLeaf(leaf: LeafNode<K, V>): Array<Node<K, V>> { | |
const leftSize = Math.ceil(leaf.values.length / 2); | |
const leftValues = leaf.values.slice(0, leftSize); | |
const rightValues = leaf.values.slice(leftSize, leaf.values.length); | |
const left = new LeafNode(leftValues); | |
const right = new LeafNode(rightValues); | |
return [left, right]; | |
} | |
partitionInner(inner: InnerNode<K, V>): Array<Node<K, V>> { | |
const leftSize = Math.ceil(inner.children.length / 2); | |
const leftChildren = inner.children.slice(0, leftSize); | |
const rightChildren = inner.children.slice(leftSize, inner.children.length); | |
const left = new InnerNode(leftChildren); | |
const right = new InnerNode(rightChildren); | |
return [left, right]; | |
} | |
add(key: ?K, value: V) { | |
// Variables representing the current state in the search for the leaf node | |
// where the key will be inserted: | |
// | |
// - indexOnParent: the position of `node` in `parent` | |
// - parent: the parent of `node` | |
// - node: The node in the path towards the leaf node where the key will be inserted. | |
let indexOnParent = -1; | |
let parent: InnerNode<K, V> = (null: any); | |
let node = this.root; | |
// Check if the root is full and split it. Be either an inner node or a leaf | |
// node. A root split is the only event that makes the B-tree grow in | |
// height. That's how it's kept balanced. | |
if (node.values) { // root is a leaf | |
const leaf: LeafNode<K, V> = (node: any); | |
if (leaf.values.length >= this.maxNodeSize) { | |
const partitions = this.partitionLeaf(leaf); | |
parent = new InnerNode(partitions); | |
this.root = parent; | |
indexOnParent = lt(this.keyKind, key, parent.keys[0]) ? 0 : 1; | |
node = parent.children[indexOnParent]; | |
} | |
} else { // root is an inner node | |
const inner: InnerNode<K, V> = (node: any); | |
if (inner.children.length >= this.maxNodeSize) { | |
const partitions = this.partitionInner(inner); | |
parent = new InnerNode(partitions); | |
this.root = parent; | |
indexOnParent = lt(this.keyKind, key, parent.keys[0]) ? 0 : 1; | |
node = parent.children[indexOnParent]; | |
} | |
} | |
// While a leaf is not found... | |
while (!node.values) { | |
// If the node is full, we split it and reconfigure the parent. Here's an | |
// illustration of what the `parent` looks like before and after the | |
// split: | |
// | |
// k0 k1 k2 | |
// [...k0] [...k...k1] [...k2] [...k3] | |
// ^ node | |
// | |
// k0 k k1 k2 | |
// [...k0] [...k] [...k1] [...k2] [...k3] | |
// ^ left ^ right | |
// | |
let inner: InnerNode<K, V> = (node: any); | |
if (inner.children.length >= this.maxNodeSize) { | |
// Invariant: parent is not null. | |
// Proof: if parent is null, node is the root. But node is full and we | |
// checked before if the root was full to split it. So node is not the | |
// root. It has a parent. | |
const [left, right] = this.partitionInner(inner); | |
const maxLeftKey = left.maxKey(); // k in the illustration | |
parent.keys.splice(indexOnParent, 0, maxLeftKey); | |
parent.children.splice(indexOnParent, 1, left, right); | |
// The indexOnParent on parent might have shifted | |
indexOnParent = lt(this.keyKind, key, maxLeftKey) ? indexOnParent : indexOnParent + 1; | |
node = parent.children[indexOnParent]; | |
inner = (node: any); | |
} | |
// Advance the search | |
indexOnParent = findUpperBoundKeyIndex(this.keyKind, inner.keys, key); | |
parent = inner; | |
node = parent.children[indexOnParent]; | |
} | |
// Found a leaf. Add the the new key-value pair to it. | |
let leaf: LeafNode<K, V> = (node: any); | |
const i = findUpperBoundIndex(this.keyKind, leaf.values, key); | |
leaf.values.splice(i, 0, new KeyValue(key, value)); | |
// Split the leaf if it overflowed after insertion | |
if (leaf.values.length > this.maxNodeSize) { | |
const [left, right] = this.partitionLeaf(leaf); | |
const maxLeftKey = left.maxKey(); | |
parent.keys.splice(indexOnParent, 0, maxLeftKey); | |
parent.children.splice(indexOnParent, 1, left, right); | |
} | |
} | |
// Pre-conditions for the fuse* and borrow* methods | |
// 1. parent has more than one children (that's why fusing/borrowing is | |
// possible) | |
// 2. node has 1 children, thus node.keys is empty (that's why we're | |
// fusing/borrowing | |
// Additional pre-conditions: | |
// - next has less than 3 children | |
// - indexOnParent < parent.children.length - 1 since the index of next on | |
// parent is indexOnParent + 1 | |
fuseWithNext( | |
parent: InnerNode<K, V>, | |
node: InnerNode<K, V>, | |
indexOnParent: number, | |
next: InnerNode<K, V>) { | |
node.keys = [node.children[0].maxKey()].concat(next.keys); | |
node.children = node.children.concat(next.children); | |
// Fix keys/children in the parent | |
if (indexOnParent + 1 < parent.keys.length) { | |
parent.keys[indexOnParent] = parent.keys[indexOnParent + 1]; | |
parent.keys.splice(indexOnParent + 1, 1); | |
} else { | |
parent.keys[indexOnParent] = node.children[node.children.length - 1].maxKey(); | |
parent.keys.splice(parent.keys.length - 1, 1); | |
} | |
parent.children.splice(indexOnParent + 1, 1); | |
} | |
// Additional pre-conditions: | |
// - prev has less than 3 children | |
// - indexOnParent > 0 since the index of prev on parent is indexOnParent - 1 | |
fuseWithPrev( | |
parent: InnerNode<K, V>, | |
node: InnerNode<K, V>, | |
indexOnParent: number, | |
prev: InnerNode<K, V>) { | |
node.keys = prev.keys; | |
node.keys.push(prev.children[prev.children.length - 1].maxKey()); | |
const child = node.children[0]; | |
node.children = prev.children; | |
node.children.push(child); | |
// Fix keys/children in the parent | |
parent.keys.splice(indexOnParent - 1, 1); | |
parent.children.splice(indexOnParent - 1, 1); | |
} | |
// Additional pre-conditions: | |
// - next has >= 3 children | |
// - indexOnParent < parent.children.length - 1 since the index of next on | |
// parent is indexOnParent + 1 | |
borrowFromNext( | |
parent: InnerNode<K, V>, | |
node: InnerNode<K, V>, | |
indexOnParent: number, | |
next: InnerNode<K, V>) { | |
const borrowCount = Math.ceil(next.children.length / 2); | |
// Copy keys/children from next | |
node.keys.push(node.children[node.children.length - 1].maxKey()); | |
node.keys = node.keys.concat(next.keys.slice(0, borrowCount - 1)); | |
const borrowed = next.children.slice(0, borrowCount); | |
node.children = node.children.concat(borrowed); | |
// Remove the borrowed keys/children from next | |
next.keys = next.keys.slice(borrowCount, next.keys.length); | |
next.children = next.children.slice(borrowCount, next.children.length); | |
// Fix keys/children in the parent | |
parent.keys[indexOnParent] = node.maxKey(); | |
} | |
// Additional pre-conditions: | |
// - prev has >= 3 children | |
// - indexOnParent > 0 since the index of prev on parent is indexOnParent - 1 | |
borrowFromPrev( | |
parent: InnerNode<K, V>, | |
node: InnerNode<K, V>, | |
indexOnParent: number, | |
prev: InnerNode<K, V>) { | |
const borrowStart = Math.ceil(prev.children.length / 2); | |
// Copy keys/children from prev | |
const borrowedKeys = prev.keys.slice(borrowStart, prev.keys.length); | |
borrowedKeys.push(prev.maxKey()); | |
node.keys = borrowedKeys.concat(node.keys); | |
const borrowed = prev.children.slice(borrowStart, prev.children.length); | |
node.children = borrowed.concat(node.children); | |
// Remove the borrowed keys/children from prev | |
prev.keys = prev.keys.slice(0, borrowStart - 1); | |
prev.children = prev.children.slice(0, borrowStart); | |
// Fix keys/children in the parent | |
parent.keys[indexOnParent - 1] = prev.children[prev.children.length - 1].maxKey(); | |
} | |
removeFirst(key: ?K): ?KeyValue<K, V> { | |
// While the root is an inner node containing only 1 children... | |
while (!this.root.values) { | |
const inner: InnerNode<K, V> = (this.root: any); | |
if (inner.children.length > 1) { | |
break; | |
} | |
// ...reduce the height of the tree. | |
this.root = inner.children[0]; | |
} | |
let parent: InnerNode<K, V> = (null: any); | |
let indexOnParent = -1; | |
let node = this.root; | |
while (!node.values) { | |
const inner: InnerNode<K, V> = (node: any); | |
if (inner.children.length === 1) { | |
// Loop invariant: parent.keys is non-empty (i.e. parent.children.length >= 2) | |
const prev: ?InnerNode<K, V> = indexOnParent > 0 ? | |
(parent.children[indexOnParent - 1]: any) : null; | |
const next: ?InnerNode<K, V> = indexOnParent < parent.children.length - 1 ? | |
(parent.children[indexOnParent + 1]: any) : null; | |
// Try to fuse with a sibling or borrow nodes from it. Fusing is | |
// preferred over borrowing. The next sibling is preferred over the | |
// previous sibling. Either prev or next are non-null since the parent | |
// has more than one children. | |
let fused = false; | |
if (next) { | |
if (next.children.length < 3) { | |
this.fuseWithNext(parent, inner, indexOnParent, next); | |
fused = true; | |
} | |
} else if ((prev: any).children.length < 3) { | |
this.fuseWithPrev(parent, inner, indexOnParent, (prev: any)); | |
fused = true; | |
} | |
if (!fused) { | |
if (next) { | |
this.borrowFromNext(parent, inner, indexOnParent, next); | |
} else { | |
this.borrowFromPrev(parent, inner, indexOnParent, (prev: any)); | |
} | |
} | |
} | |
// Advance the search | |
indexOnParent = findUpperBoundKeyIndex(this.keyKind, inner.keys, key); | |
parent = inner; | |
node = parent.children[indexOnParent]; | |
} | |
// Found a leaf. Remove the key-value pair if it exists. | |
let leaf: LeafNode<K, V> = (node: any); | |
const i = findUpperBoundIndex(this.keyKind, leaf.values, key); | |
const pair = leaf.values[i]; | |
if (!pair || !eq(this.keyKind, key, pair.key)) { | |
return undefined; | |
} | |
leaf.values.splice(i, 1); | |
// Fix parent | |
if (leaf.values.length === 0 && parent) { | |
parent.keys.splice(Math.min(indexOnParent, parent.keys.length - 1), 1); | |
parent.children.splice(indexOnParent, 1); | |
} | |
return pair; | |
} | |
size() { | |
if (this.root.values) { | |
const leaf: LeafNode<K, V> = (this.root: any); | |
return leaf.values.length; | |
} | |
const inner: InnerNode<K, V> = (this.root: any); | |
return inner.size(); | |
} | |
} | |
Btree.KEY_KIND_STRING = KEY_KIND_STRING; | |
Btree.KEY_KIND_NUMBER = KEY_KIND_NUMBER; | |
Btree.KEY_KIND_BOOL = KEY_KIND_BOOL; | |
Btree.KEY_KIND_RECORD = KEY_KIND_RECORD; | |
Btree.KeyValue = KeyValue; | |
Btree.InnerNode = InnerNode; | |
Btree.LeafNode = LeafNode; | |
Btree.detail = { | |
isAtomic, | |
lt, | |
eq, | |
findUpperBoundIndex, | |
findUpperBoundKeyIndex, | |
}; | |
module.exports = Btree; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This implementation allows empty nodes and less than half-full nodes, but due to top-down splitting/fusing/borrowing (in contrast with classical bottom-up fusing/splitting) it guarantees O(log N) tree height where N is the number of insertions.
More details in Deletion Without Rebalancing in Multiway Search Trees by Siddhartha Sen and Robert E. Tarjan from Microsoft Research.