Created
September 25, 2024 10:20
-
-
Save OhadRubin/141d6b09d6af081d3b9e266fa188328e to your computer and use it in GitHub Desktop.
B-tree in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BTreeNode: | |
def __init__(self, t, leaf=False): | |
""" | |
Initialize a B-tree node. | |
Args: | |
t (int): Minimum degree of B-tree node. | |
leaf (bool, optional): True if node is a leaf. Defaults to False. | |
""" | |
self.t = t # Minimum degree (defines the range for number of keys) | |
self.keys = [] # List of keys | |
self.children = [] # List of child pointers | |
self.leaf = leaf # True when node is leaf. Otherwise False. | |
def __str__(self): | |
return str(self.keys) | |
class BTree: | |
def __init__(self, t): | |
""" | |
Initialize an empty B-tree. | |
Args: | |
t (int): Minimum degree of B-tree. | |
""" | |
self.root = BTreeNode(t, leaf=True) | |
self.t = t # Minimum degree | |
def search(self, k, x=None): | |
""" | |
Search for key k in subtree rooted with node x. | |
Args: | |
k: Key to search for. | |
x (BTreeNode, optional): Node to search in. Defaults to root node. | |
Returns: | |
Tuple(BTreeNode, int): Node where key is found and index of key. | |
If key not found, returns None. | |
""" | |
if x is None: | |
x = self.root | |
i = 0 | |
# Find the first key greater than or equal to k | |
while i < len(x.keys) and k > x.keys[i]: | |
i += 1 | |
# If the found key is equal to k, return this node and index | |
if i < len(x.keys) and x.keys[i] == k: | |
return x, i | |
# If the key is not found here and this is a leaf node | |
if x.leaf: | |
return None # Key is not found | |
# Go to the appropriate child | |
return self.search(k, x.children[i]) | |
def insert(self, k): | |
""" | |
Insert a key k into the B-tree. | |
Args: | |
k: Key to insert. | |
""" | |
root = self.root | |
# If root is full, tree grows in height | |
if len(root.keys) == (2 * self.t) - 1: | |
s = BTreeNode(self.t) | |
s.children.insert(0, self.root) | |
s.leaf = False | |
self.root = s | |
self._split_child(s, 0) | |
self._insert_non_full(s, k) | |
else: | |
self._insert_non_full(root, k) | |
def _insert_non_full(self, x, k): | |
""" | |
Insert a key into a non-full node x. | |
Args: | |
x (BTreeNode): Node which is non-full. | |
k: Key to insert. | |
""" | |
i = len(x.keys) - 1 | |
if x.leaf: | |
# Insert a key into the leaf node | |
x.keys.append(0) # Dummy value to expand the list | |
while i >= 0 and k < x.keys[i]: | |
x.keys[i + 1] = x.keys[i] | |
i -= 1 | |
x.keys[i + 1] = k # Insert the new key | |
else: | |
# Move to the appropriate child node | |
while i >= 0 and k < x.keys[i]: | |
i -= 1 | |
i += 1 | |
# If the child is full, split it | |
if len(x.children[i].keys) == (2 * self.t) - 1: | |
self._split_child(x, i) | |
if k >= x.keys[i]: | |
i += 1 | |
self._insert_non_full(x.children[i], k) | |
def _split_child(self, x, i): | |
""" | |
Split the child y of node x at index i. | |
Args: | |
x (BTreeNode): Parent node whose child is to be split. | |
i (int): Index of the child in x.children. | |
""" | |
t = self.t | |
y = x.children[i] | |
z = BTreeNode(t, leaf=y.leaf) | |
# Store the middle key to move up to the parent | |
middle_key = y.keys[t - 1] | |
# Assign keys to z (after the middle key) | |
z.keys = y.keys[t:] # Keys from index t to end | |
# Assign keys to y (before the middle key) | |
y.keys = y.keys[:t - 1] # Keys from index 0 to t-2 | |
# If y is not a leaf, move the appropriate children | |
if not y.leaf: | |
z.children = y.children[t:] # Children from index t to end | |
y.children = y.children[:t] # Children from index 0 to t-1 | |
# Insert z as a child of x | |
x.children.insert(i + 1, z) | |
# Move the middle key up to x | |
x.keys.insert(i, middle_key) | |
def delete(self, k): | |
""" | |
Delete key k from the B-tree. | |
Args: | |
k: Key to delete. | |
""" | |
self._delete(self.root, k) | |
# If the root node has no keys, adjust the root | |
if len(self.root.keys) == 0 and not self.root.leaf: | |
self.root = self.root.children[0] | |
def _delete(self, x, k): | |
""" | |
Delete key k from the subtree rooted at x. | |
Args: | |
x (BTreeNode): Node to delete from. | |
k: Key to delete. | |
""" | |
idx = self._find_key(x, k) | |
if idx < len(x.keys) and x.keys[idx] == k: | |
# The key is in this node | |
if x.leaf: | |
# Case 1: x is a leaf node | |
x.keys.pop(idx) | |
else: | |
# Case 2: x is an internal node | |
self._delete_internal_node(x, k, idx) | |
else: | |
# The key is not in this node | |
if x.leaf: | |
# The key is not in the tree | |
return | |
else: | |
# The key may be in a child | |
flag = (idx == len(x.keys)) | |
if len(x.children[idx].keys) < self.t: | |
self._fill(x, idx) | |
# After filling, check if we merged children | |
if flag and idx > len(x.keys): | |
self._delete(x.children[idx - 1], k) | |
else: | |
self._delete(x.children[idx], k) | |
def _delete_internal_node(self, x, k, idx): | |
""" | |
Delete key k from internal node x at index idx. | |
Args: | |
x (BTreeNode): Internal node. | |
k: Key to delete. | |
idx (int): Index of key k in node x.keys. | |
""" | |
y = x.children[idx] | |
z = x.children[idx + 1] | |
if len(y.keys) >= self.t: | |
# Case 2a: Predecessor has at least t keys | |
pred_key = self._get_predecessor(y) | |
x.keys[idx] = pred_key | |
self._delete(y, pred_key) | |
elif len(z.keys) >= self.t: | |
# Case 2b: Successor has at least t keys | |
succ_key = self._get_successor(z) | |
x.keys[idx] = succ_key | |
self._delete(z, succ_key) | |
else: | |
# Case 2c: Both children have t-1 keys, merge them | |
self._merge(x, idx) | |
self._delete(y, k) | |
def _get_predecessor(self, x): | |
""" | |
Get the predecessor key of node x. | |
Args: | |
x (BTreeNode): Node to find predecessor from. | |
Returns: | |
Predecessor key. | |
""" | |
while not x.leaf: | |
x = x.children[-1] | |
return x.keys[-1] | |
def _get_successor(self, x): | |
""" | |
Get the successor key of node x. | |
Args: | |
x (BTreeNode): Node to find successor from. | |
Returns: | |
Successor key. | |
""" | |
while not x.leaf: | |
x = x.children[0] | |
return x.keys[0] | |
def _fill(self, x, idx): | |
""" | |
Ensure that the child x.children[idx] has at least t keys. | |
Args: | |
x (BTreeNode): Parent node. | |
idx (int): Index of child in x.children. | |
""" | |
if idx > 0 and len(x.children[idx - 1].keys) >= self.t: | |
self._borrow_from_prev(x, idx) | |
elif idx < len(x.children) - 1 and len(x.children[idx + 1].keys) >= self.t: | |
self._borrow_from_next(x, idx) | |
else: | |
if idx < len(x.children) - 1: | |
self._merge(x, idx) | |
else: | |
self._merge(x, idx - 1) | |
def _borrow_from_prev(self, x, idx): | |
""" | |
Borrow a key from x.children[idx - 1] and insert into x.children[idx]. | |
Args: | |
x (BTreeNode): Parent node. | |
idx (int): Index of child in x.children. | |
""" | |
child = x.children[idx] | |
sibling = x.children[idx - 1] | |
child.keys.insert(0, x.keys[idx - 1]) | |
if not child.leaf: | |
child.children.insert(0, sibling.children.pop()) | |
x.keys[idx - 1] = sibling.keys.pop() | |
def _borrow_from_next(self, x, idx): | |
""" | |
Borrow a key from x.children[idx + 1] and insert into x.children[idx]. | |
Args: | |
x (BTreeNode): Parent node. | |
idx (int): Index of child in x.children. | |
""" | |
child = x.children[idx] | |
sibling = x.children[idx + 1] | |
child.keys.append(x.keys[idx]) | |
if not child.leaf: | |
child.children.append(sibling.children.pop(0)) | |
x.keys[idx] = sibling.keys.pop(0) | |
def _merge(self, x, idx): | |
""" | |
Merge x.children[idx] and x.children[idx + 1]. | |
Args: | |
x (BTreeNode): Parent node. | |
idx (int): Index of child in x.children. | |
""" | |
child = x.children[idx] | |
sibling = x.children[idx + 1] | |
child.keys.append(x.keys[idx]) | |
child.keys.extend(sibling.keys) | |
if not child.leaf: | |
child.children.extend(sibling.children) | |
x.keys.pop(idx) | |
x.children.pop(idx + 1) | |
def _find_key(self, x, k): | |
""" | |
Find the first index idx in x.keys such that k <= x.keys[idx]. | |
Args: | |
x (BTreeNode): Node to search. | |
k: Key to find. | |
Returns: | |
int: Index idx. | |
""" | |
idx = 0 | |
while idx < len(x.keys) and x.keys[idx] < k: | |
idx += 1 | |
return idx | |
def traverse(self): | |
""" | |
Traverse the tree and print keys in order. | |
""" | |
self._traverse(self.root) | |
print() | |
def _traverse(self, x): | |
""" | |
Traverse subtree rooted at node x. | |
Args: | |
x (BTreeNode): Node to traverse. | |
""" | |
for i in range(len(x.keys)): | |
if not x.leaf: | |
self._traverse(x.children[i]) | |
print(x.keys[i], end=' ') | |
if not x.leaf: | |
self._traverse(x.children[len(x.keys)]) | |
def display(self): | |
""" | |
Display the tree structure. | |
""" | |
self._display(self.root, 0) | |
def _display(self, x, level): | |
""" | |
Display subtree rooted at node x. | |
Args: | |
x (BTreeNode): Node to display. | |
level (int): Current level in the tree. | |
""" | |
print('Level', level, ' ', len(x.keys), 'keys:', x.keys) | |
if not x.leaf: | |
for child in x.children: | |
self._display(child, level + 1) | |
# Example usage: | |
# if __name__ == "__main__": | |
# Create a B-tree with minimum degree t | |
t = 3 | |
btree = BTree(t) | |
# Insert keys into the B-tree | |
keys = [10, 20, 5, 6, 12, 30, 7, 17] | |
for key in keys: | |
btree.insert(key) | |
print("Traversal of tree after insertion:") | |
btree.traverse() | |
# Search for a key | |
key = 6 | |
result = btree.search(key) | |
if result: | |
node, idx = result | |
print(f"Key {key} found in node: {node.keys}") | |
else: | |
print(f"Key {key} not found in the B-tree.") | |
# Delete keys from the B-tree | |
btree.delete(6) | |
btree.delete(13) # Key not present in tree | |
btree.delete(7) | |
btree.delete(4) # Key not present in tree | |
btree.delete(2) # Key not present in tree | |
btree.delete(16) # Key not present in tree | |
print("\nTraversal of tree after deletion:") | |
btree.traverse() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment