Created
April 10, 2023 20:49
-
-
Save Logiraptor/b39222ce2e1cc4c9f34e1bc2484b0fe3 to your computer and use it in GitHub Desktop.
A Trie designed to split a list of names into prefixes for querying prometheus efficiently
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import string | |
| class Trie: | |
| def __init__(self, prefix='', terminal=False, children=None): | |
| self.prefix = prefix | |
| self.terminal = terminal | |
| self.children = children or [] | |
| def add(self, name): | |
| if not name.startswith(self.prefix): | |
| common_prefix = find_common_prefix(name, self.prefix) | |
| new_children = [ | |
| Trie(prefix=self.prefix, children=self.children, terminal=self.terminal), | |
| Trie(prefix=name, terminal=True), | |
| ] | |
| self.prefix = common_prefix | |
| self.children = new_children | |
| self.terminal = False | |
| return | |
| for child in self.children: | |
| if name.startswith(child.prefix): | |
| child.add(name) | |
| return | |
| best_prefix = self.prefix | |
| best_index = -1 | |
| for i, child in enumerate(self.children): | |
| common_prefix = find_common_prefix(name, child.prefix) | |
| if len(common_prefix) > len(best_prefix): | |
| best_prefix = common_prefix | |
| best_index = i | |
| if best_index == -1: | |
| self.children.append(Trie(prefix=name, terminal=True)) | |
| return | |
| child = self.children[best_index] | |
| child.add(name) | |
| def count(self): | |
| total = 0 | |
| if self.terminal: | |
| total += 1 | |
| for child in self.children: | |
| total += child.count() | |
| return total | |
| def partition_subtries(self, max_size, callback): | |
| if self.count() <= max_size: | |
| callback(self.prefix, False) | |
| return | |
| if self.terminal: | |
| callback(self.prefix, True) | |
| for child in self.children: | |
| child.partition_subtries(max_size, callback) | |
| def find_common_prefix(a, b): | |
| for i in range(min(len(a), len(b))): | |
| if a[i] != b[i]: | |
| return a[:i] | |
| return a | |
| def split_into_prefixed_buckets(names, max_node_size, callback): | |
| trie = Trie() | |
| for name in names: | |
| trie.add(name) | |
| trie.partition_subtries(max_node_size, callback) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment