-
-
Save teepark/572734 to your computer and use it in GitHub Desktop.
import bisect | |
import itertools | |
import operator | |
class _BNode(object): | |
__slots__ = ["tree", "contents", "children"] | |
def __init__(self, tree, contents=None, children=None): | |
self.tree = tree | |
self.contents = contents or [] | |
self.children = children or [] | |
if self.children: | |
assert len(self.contents) + 1 == len(self.children), \ | |
"one more child than data item required" | |
def __repr__(self): | |
name = getattr(self, "children", 0) and "Branch" or "Leaf" | |
return "<%s %s>" % (name, ", ".join(map(str, self.contents))) | |
def lateral(self, parent, parent_index, dest, dest_index): | |
if parent_index > dest_index: | |
dest.contents.append(parent.contents[dest_index]) | |
parent.contents[dest_index] = self.contents.pop(0) | |
if self.children: | |
dest.children.append(self.children.pop(0)) | |
else: | |
dest.contents.insert(0, parent.contents[parent_index]) | |
parent.contents[parent_index] = self.contents.pop() | |
if self.children: | |
dest.children.insert(0, self.children.pop()) | |
def shrink(self, ancestors): | |
parent = None | |
if ancestors: | |
parent, parent_index = ancestors.pop() | |
# try to lend to the left neighboring sibling | |
if parent_index: | |
left_sib = parent.children[parent_index - 1] | |
if len(left_sib.contents) < self.tree.order: | |
self.lateral( | |
parent, parent_index, left_sib, parent_index - 1) | |
return | |
# try the right neighbor | |
if parent_index + 1 < len(parent.children): | |
right_sib = parent.children[parent_index + 1] | |
if len(right_sib.contents) < self.tree.order: | |
self.lateral( | |
parent, parent_index, right_sib, parent_index + 1) | |
return | |
center = len(self.contents) // 2 | |
sibling, push = self.split() | |
if not parent: | |
parent, parent_index = self.tree.BRANCH( | |
self.tree, children=[self]), 0 | |
self.tree._root = parent | |
# pass the median up to the parent | |
parent.contents.insert(parent_index, push) | |
parent.children.insert(parent_index + 1, sibling) | |
if len(parent.contents) > parent.tree.order: | |
parent.shrink(ancestors) | |
def grow(self, ancestors): | |
parent, parent_index = ancestors.pop() | |
minimum = self.tree.order // 2 | |
left_sib = right_sib = None | |
# try to borrow from the right sibling | |
if parent_index + 1 < len(parent.children): | |
right_sib = parent.children[parent_index + 1] | |
if len(right_sib.contents) > minimum: | |
right_sib.lateral(parent, parent_index + 1, self, parent_index) | |
return | |
# try to borrow from the left sibling | |
if parent_index: | |
left_sib = parent.children[parent_index - 1] | |
if len(left_sib.contents) > minimum: | |
left_sib.lateral(parent, parent_index - 1, self, parent_index) | |
return | |
# consolidate with a sibling - try left first | |
if left_sib: | |
left_sib.contents.append(parent.contents[parent_index - 1]) | |
left_sib.contents.extend(self.contents) | |
if self.children: | |
left_sib.children.extend(self.children) | |
parent.contents.pop(parent_index - 1) | |
parent.children.pop(parent_index) | |
else: | |
self.contents.append(parent.contents[parent_index]) | |
self.contents.extend(right_sib.contents) | |
if self.children: | |
self.children.extend(right_sib.children) | |
parent.contents.pop(parent_index) | |
parent.children.pop(parent_index + 1) | |
if len(parent.contents) < minimum: | |
if ancestors: | |
# parent is not the root | |
parent.grow(ancestors) | |
elif not parent.contents: | |
# parent is root, and its now empty | |
self.tree._root = left_sib or self | |
def split(self): | |
center = len(self.contents) // 2 | |
median = self.contents[center] | |
sibling = type(self)( | |
self.tree, | |
self.contents[center + 1:], | |
self.children[center + 1:]) | |
self.contents = self.contents[:center] | |
self.children = self.children[:center + 1] | |
return sibling, median | |
def insert(self, index, item, ancestors): | |
self.contents.insert(index, item) | |
if len(self.contents) > self.tree.order: | |
self.shrink(ancestors) | |
def remove(self, index, ancestors): | |
minimum = self.tree.order // 2 | |
if self.children: | |
# try promoting from the right subtree first, | |
# but only if it won't have to resize | |
additional_ancestors = [(self, index + 1)] | |
descendent = self.children[index + 1] | |
while descendent.children: | |
additional_ancestors.append((descendent, 0)) | |
descendent = descendent.children[0] | |
if len(descendent.contents) > minimum: | |
ancestors.extend(additional_ancestors) | |
self.contents[index] = descendent.contents[0] | |
descendent.remove(0, ancestors) | |
return | |
# fall back to the left child | |
additional_ancestors = [(self, index)] | |
descendent = self.children[index] | |
while descendent.children: | |
additional_ancestors.append( | |
(descendent, len(descendent.children) - 1)) | |
descendent = descendent.children[-1] | |
ancestors.extend(additional_ancestors) | |
self.contents[index] = descendent.contents[-1] | |
descendent.remove(len(descendent.children) - 1, ancestors) | |
else: | |
self.contents.pop(index) | |
if len(self.contents) < minimum and ancestors: | |
self.grow(ancestors) | |
class _BPlusLeaf(_BNode): | |
__slots__ = ["tree", "contents", "data", "next"] | |
def __init__(self, tree, contents=None, data=None, next=None): | |
self.tree = tree | |
self.contents = contents or [] | |
self.data = data or [] | |
self.next = next | |
assert len(self.contents) == len(self.data), "one data per key" | |
def insert(self, index, key, data, ancestors): | |
self.contents.insert(index, key) | |
self.data.insert(index, data) | |
if len(self.contents) > self.tree.order: | |
self.shrink(ancestors) | |
def lateral(self, parent, parent_index, dest, dest_index): | |
if parent_index > dest_index: | |
dest.contents.append(self.contents.pop(0)) | |
dest.data.append(self.data.pop(0)) | |
parent.contents[dest_index] = self.contents[0] | |
else: | |
dest.contents.insert(0, self.contents.pop()) | |
dest.data.insert(0, self.data.pop()) | |
parent.contents[parent_index] = dest.contents[0] | |
def split(self): | |
center = len(self.contents) // 2 | |
median = self.contents[center - 1] | |
sibling = type(self)( | |
self.tree, | |
self.contents[center:], | |
self.data[center:], | |
self.next) | |
self.contents = self.contents[:center] | |
self.data = self.data[:center] | |
self.next = sibling | |
return sibling, sibling.contents[0] | |
def remove(self, index, ancestors): | |
minimum = self.tree.order // 2 | |
if index >= len(self.contents): | |
self, index = self.next, 0 | |
key = self.contents[index] | |
# if any leaf that could accept the key can do so | |
# without any rebalancing necessary, then go that route | |
current = self | |
while current is not None and current.contents[0] == key: | |
if len(current.contents) > minimum: | |
if current.contents[0] == key: | |
index = 0 | |
else: | |
index = bisect.bisect_left(current.contents, key) | |
current.contents.pop(index) | |
current.data.pop(index) | |
return | |
current = current.next | |
self.grow(ancestors) | |
def grow(self, ancestors): | |
minimum = self.tree.order // 2 | |
parent, parent_index = ancestors.pop() | |
left_sib = right_sib = None | |
# try borrowing from a neighbor - try right first | |
if parent_index + 1 < len(parent.children): | |
right_sib = parent.children[parent_index + 1] | |
if len(right_sib.contents) > minimum: | |
right_sib.lateral(parent, parent_index + 1, self, parent_index) | |
return | |
# fallback to left | |
if parent_index: | |
left_sib = parent.children[parent_index - 1] | |
if len(left_sib.contents) > minimum: | |
left_sib.lateral(parent, parent_index - 1, self, parent_index) | |
return | |
# join with a neighbor - try left first | |
if left_sib: | |
left_sib.contents.extend(self.contents) | |
left_sib.data.extend(self.data) | |
parent.remove(parent_index - 1, ancestors) | |
return | |
# fallback to right | |
self.contents.extend(right_sib.contents) | |
self.data.extend(right_sib.data) | |
parent.remove(parent_index, ancestors) | |
class BTree(object): | |
BRANCH = LEAF = _BNode | |
def __init__(self, order): | |
self.order = order | |
self._root = self._bottom = self.LEAF(self) | |
def _path_to(self, item): | |
current = self._root | |
ancestry = [] | |
while getattr(current, "children", None): | |
index = bisect.bisect_left(current.contents, item) | |
ancestry.append((current, index)) | |
if index < len(current.contents) \ | |
and current.contents[index] == item: | |
return ancestry | |
current = current.children[index] | |
index = bisect.bisect_left(current.contents, item) | |
ancestry.append((current, index)) | |
present = index < len(current.contents) | |
present = present and current.contents[index] == item | |
return ancestry | |
def _present(self, item, ancestors): | |
last, index = ancestors[-1] | |
return index < len(last.contents) and last.contents[index] == item | |
def insert(self, item): | |
current = self._root | |
ancestors = self._path_to(item) | |
node, index = ancestors[-1] | |
while getattr(node, "children", None): | |
node = node.children[index] | |
index = bisect.bisect_left(node.contents, item) | |
ancestors.append((node, index)) | |
node, index = ancestors.pop() | |
node.insert(index, item, ancestors) | |
def remove(self, item): | |
current = self._root | |
ancestors = self._path_to(item) | |
if self._present(item, ancestors): | |
node, index = ancestors.pop() | |
node.remove(index, ancestors) | |
else: | |
raise ValueError("%r not in %s" % (item, self.__class__.__name__)) | |
def __contains__(self, item): | |
return self._present(item, self._path_to(item)) | |
def __iter__(self): | |
def _recurse(node): | |
if node.children: | |
for child, item in zip(node.children, node.contents): | |
for child_item in _recurse(child): | |
yield child_item | |
yield item | |
for child_item in _recurse(node.children[-1]): | |
yield child_item | |
else: | |
for item in node.contents: | |
yield item | |
for item in _recurse(self._root): | |
yield item | |
def __repr__(self): | |
def recurse(node, accum, depth): | |
accum.append((" " * depth) + repr(node)) | |
for node in getattr(node, "children", []): | |
recurse(node, accum, depth + 1) | |
accum = [] | |
recurse(self._root, accum, 0) | |
return "\n".join(accum) | |
@classmethod | |
def bulkload(cls, items, order): | |
tree = object.__new__(cls) | |
tree.order = order | |
leaves = tree._build_bulkloaded_leaves(items) | |
tree._build_bulkloaded_branches(leaves) | |
return tree | |
def _build_bulkloaded_leaves(self, items): | |
minimum = self.order // 2 | |
leaves, seps = [[]], [] | |
for item in items: | |
if len(leaves[-1]) < self.order: | |
leaves[-1].append(item) | |
else: | |
seps.append(item) | |
leaves.append([]) | |
if len(leaves[-1]) < minimum and seps: | |
last_two = leaves[-2] + [seps.pop()] + leaves[-1] | |
leaves[-2] = last_two[:minimum] | |
leaves[-1] = last_two[minimum + 1:] | |
seps.append(last_two[minimum]) | |
return [self.LEAF(self, contents=node) for node in leaves], seps | |
def _build_bulkloaded_branches(self, (leaves, seps)): | |
minimum = self.order // 2 | |
levels = [leaves] | |
while len(seps) > self.order + 1: | |
items, nodes, seps = seps, [[]], [] | |
for item in items: | |
if len(nodes[-1]) < self.order: | |
nodes[-1].append(item) | |
else: | |
seps.append(item) | |
nodes.append([]) | |
if len(nodes[-1]) < minimum and seps: | |
last_two = nodes[-2] + [seps.pop()] + nodes[-1] | |
nodes[-2] = last_two[:minimum] | |
nodes[-1] = last_two[minimum + 1:] | |
seps.append(last_two[minimum]) | |
offset = 0 | |
for i, node in enumerate(nodes): | |
children = levels[-1][offset:offset + len(node) + 1] | |
nodes[i] = self.BRANCH(self, contents=node, children=children) | |
offset += len(node) + 1 | |
levels.append(nodes) | |
self._root = self.BRANCH(self, contents=seps, children=levels[-1]) | |
class BPlusTree(BTree): | |
LEAF = _BPlusLeaf | |
def _get(self, key): | |
node, index = self._path_to(key)[-1] | |
if index == len(node.contents): | |
if node.next: | |
node, index = node.next, 0 | |
else: | |
return | |
while node.contents[index] == key: | |
yield node.data[index] | |
index += 1 | |
if index == len(node.contents): | |
if node.next: | |
node, index = node.next, 0 | |
else: | |
return | |
def _path_to(self, item): | |
path = super(BPlusTree, self)._path_to(item) | |
node, index = path[-1] | |
while hasattr(node, "children"): | |
node = node.children[index] | |
index = bisect.bisect_left(node.contents, item) | |
path.append((node, index)) | |
return path | |
def get(self, key, default=None): | |
try: | |
return self._get(key).next() | |
except StopIteration: | |
return default | |
def getlist(self, key): | |
return list(self._get(key)) | |
def insert(self, key, data): | |
path = self._path_to(key) | |
node, index = path.pop() | |
node.insert(index, key, data, path) | |
def remove(self, key): | |
path = self._path_to(key) | |
node, index = path.pop() | |
node.remove(index, path) | |
__getitem__ = get | |
__setitem__ = insert | |
__delitem__ = remove | |
def __contains__(self, key): | |
for item in self._get(key): | |
return True | |
return False | |
def iteritems(self): | |
node = self._root | |
while hasattr(node, "children"): | |
node = node.children[0] | |
while node: | |
for pair in itertools.izip(node.contents, node.data): | |
yield pair | |
node = node.next | |
def iterkeys(self): | |
return itertools.imap(operator.itemgetter(0), self.iteritems()) | |
def itervalues(self): | |
return itertools.imap(operator.itemgetter(1), self.iteritems()) | |
__iter__ = iterkeys | |
def items(self): | |
return list(self.iteritems()) | |
def keys(self): | |
return list(self.iterkeys()) | |
def values(self): | |
return list(self.itervalues()) | |
def _build_bulkloaded_leaves(self, items): | |
minimum = self.order // 2 | |
leaves, seps = [[]], [] | |
for item in items: | |
if len(leaves[-1]) >= self.order: | |
seps.append(item) | |
leaves.append([]) | |
leaves[-1].append(item) | |
if len(leaves[-1]) < minimum and seps: | |
last_two = leaves[-2] + leaves[-1] | |
leaves[-2] = last_two[:minimum] | |
leaves[-1] = last_two[minimum:] | |
seps.append(last_two[minimum]) | |
leaves = [self.LEAF( | |
self, | |
contents=[p[0] for p in pairs], | |
data=[p[1] for p in pairs]) | |
for pairs in leaves] | |
for i in xrange(len(leaves) - 1): | |
leaves[i].next = leaves[i + 1] | |
return leaves, [s[0] for s in seps] | |
import random | |
import unittest | |
class BTreeTests(unittest.TestCase): | |
def test_additions(self): | |
bt = BTree(20) | |
l = range(2000) | |
for i, item in enumerate(l): | |
bt.insert(item) | |
self.assertEqual(list(bt), l[:i + 1]) | |
def test_bulkloads(self): | |
bt = BTree.bulkload(range(2000), 20) | |
self.assertEqual(list(bt), range(2000)) | |
def test_removals(self): | |
bt = BTree(20) | |
l = range(2000) | |
map(bt.insert, l) | |
rand = l[:] | |
random.shuffle(rand) | |
while l: | |
self.assertEqual(list(bt), l) | |
rem = rand.pop() | |
l.remove(rem) | |
bt.remove(rem) | |
self.assertEqual(list(bt), l) | |
def test_insert_regression(self): | |
bt = BTree.bulkload(range(2000), 50) | |
for i in xrange(100000): | |
bt.insert(random.randrange(2000)) | |
class BPlusTreeTests(unittest.TestCase): | |
def test_additions_sorted(self): | |
bt = BPlusTree(20) | |
l = range(2000) | |
for item in l: | |
bt.insert(item, str(item)) | |
for item in l: | |
self.assertEqual(str(item), bt[item]) | |
self.assertEqual(l, list(bt)) | |
def test_additions_random(self): | |
bt = BPlusTree(20) | |
l = range(2000) | |
random.shuffle(l) | |
for item in l: | |
bt.insert(item, str(item)) | |
for item in l: | |
self.assertEqual(str(item), bt[item]) | |
self.assertEqual(range(2000), list(bt)) | |
def test_bulkload(self): | |
bt = BPlusTree.bulkload(zip(range(2000), map(str, range(2000))), 20) | |
self.assertEqual(list(bt), range(2000)) | |
self.assertEqual( | |
list(bt.iteritems()), | |
zip(range(2000), map(str, range(2000)))) | |
if __name__ == '__main__': | |
unittest.main() |
The problem seems to be that internally, _BNode.remove() is being called instead of _BPlusLeaf.remove().
Hi,
I see that the "next" is implemented at the linked list leaves-level of the tree. What about the "previous"? How could that be implemented with a double linked list?
@teepark was thinking of merging some parts of the module to my work here which im working on here:
https://github.com/codecakes/BST ; still needs debugging but feel free to try out and maybe I would merge the best features from here and mine for a cleaner design.
Hi all,
Anyone else having problems when removing from a tree with order > 1 and 1 item? Any one knows how to fix this?
def test_delete(self):
"""
This breaks remove function...
"""
bt = BPlusTree(2)
bt.insert(1000, "Test")
bt.remove(1000)
print(bt)
The problem seems to be at line 224 (grow
function) which I am not sure if it should be called in this case...
UPDATE:
Changed line 212 to:
if len(current.contents) > minimum or len(ancestors) == 0:
and seems working...
hey guys, I found a bug, from line 148 to line 154:
while descendent.children:
additional_ancestors.append(
(descendent, len(descendent.children) - 1))
descendent = descendent.children[-1]
ancestors.extend(additional_ancestors)
self.contents[index] = descendent.contents[-1]
descendent.remove(len(descendent.children) - 1, ancestors)
when while
break, descendent
must be a BPlusLeaf
, whitch has no attribute children
.
Hi guys,
when i use the BPlusTree, and i try to delete one element, I find an error. For example,
in
def test_additions_sorted(self):
bt = BPlusTree(20)
l = range(2000)
I have added the list removal and executing i get:
ERROR: test_additions_sorted (main.BPlusTreeTests)
Traceback (most recent call last):
File "btree.py", line 557, in test_additions_sorted
bt.remove(item)
File "btree.py", line 443, in remove
node.remove(index, path)
File "btree.py", line 222, in remove
self.grow(ancestors)
File "btree.py", line 253, in grow
parent.remove(parent_index, ancestors)
File "btree.py", line 136, in remove
while descendent.children:
AttributeError: children
What can be happening?
Thanks in advance.