Created
November 21, 2015 22:58
-
-
Save nickstanisha/733c134a0171a00f66d4 to your computer and use it in GitHub Desktop.
An object-oriented implementation of a "Trie" in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Node: | |
def __init__(self, label=None, data=None): | |
self.label = label | |
self.data = data | |
self.children = dict() | |
def addChild(self, key, data=None): | |
if not isinstance(key, Node): | |
self.children[key] = Node(key, data) | |
else: | |
self.children[key.label] = key | |
def __getitem__(self, key): | |
return self.children[key] | |
class Trie: | |
def __init__(self): | |
self.head = Node() | |
def __getitem__(self, key): | |
return self.head.children[key] | |
def add(self, word): | |
current_node = self.head | |
word_finished = True | |
for i in range(len(word)): | |
if word[i] in current_node.children: | |
current_node = current_node.children[word[i]] | |
else: | |
word_finished = False | |
break | |
# For ever new letter, create a new child node | |
if not word_finished: | |
while i < len(word): | |
current_node.addChild(word[i]) | |
current_node = current_node.children[word[i]] | |
i += 1 | |
# Let's store the full word at the end node so we don't need to | |
# travel back up the tree to reconstruct the word | |
current_node.data = word | |
def has_word(self, word): | |
if word == '': | |
return False | |
if word == None: | |
raise ValueError('Trie.has_word requires a not-Null string') | |
# Start at the top | |
current_node = self.head | |
exists = True | |
for letter in word: | |
if letter in current_node.children: | |
current_node = current_node.children[letter] | |
else: | |
exists = False | |
break | |
# Still need to check if we just reached a word like 't' | |
# that isn't actually a full word in our dictionary | |
if exists: | |
if current_node.data == None: | |
exists = False | |
return exists | |
def start_with_prefix(self, prefix): | |
""" Returns a list of all words in tree that start with prefix """ | |
words = list() | |
if prefix == None: | |
raise ValueError('Requires not-Null prefix') | |
# Determine end-of-prefix node | |
top_node = self.head | |
for letter in prefix: | |
if letter in top_node.children: | |
top_node = top_node.children[letter] | |
else: | |
# Prefix not in tree, go no further | |
return words | |
# Get words under prefix | |
if top_node == self.head: | |
queue = [node for key, node in top_node.children.iteritems()] | |
else: | |
queue = [top_node] | |
# Perform a breadth first search under the prefix | |
# A cool effect of using BFS as opposed to DFS is that BFS will return | |
# a list of words ordered by increasing length | |
while queue: | |
current_node = queue.pop() | |
if current_node.data != None: | |
# Isn't it nice to not have to go back up the tree? | |
words.append(current_node.data) | |
queue = [node for key,node in current_node.children.iteritems()] + queue | |
return words | |
def getData(self, word): | |
""" This returns the 'data' of the node identified by the given word """ | |
if not self.has_word(word): | |
raise ValueError('{} not found in trie'.format(word)) | |
# Race to the bottom, get data | |
current_node = self.head | |
for letter in word: | |
current_node = current_node[letter] | |
return current_node.data | |
if __name__ == '__main__': | |
""" Example use """ | |
trie = Trie() | |
words = 'hello goodbye help gerald gold tea ted team to too tom stan standard money' | |
for word in words.split(): | |
trie.add(word) | |
print "'goodbye' in trie: ", trie.has_word('goodbye') | |
print trie.start_with_prefix('g') | |
print trie.start_with_prefix('to') |
Could you explain the purpose of line 85-86?
The representation can be made much more concise. For example
class TrieNode(dict):
def __init__(self, val = None):
dict.__init__(self)
self.val = val
def add_child(self, child_node):
self[child_node.val] = child_node
class Trie:
def __init__(self, root: TrieNode = None):
self.root = root
if self.root is None:
self.root = TrieNode()
def add_word(self, word):
current_node = self.root
for depth, character in enumerate(word):
if character not in current_node:
child = TrieNode(character, depth=depth)
current_node.add_child(child)
current_node = current_node[character]
Shouldn't you have a function for deletion
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
So I'm reviewing data structures and I've found some good implementation examples in Python for most of the big ones. However, try as I might, I couldn't find a good example of a trie implemented in Python that used object-oriented principles. Several examples that I did find recommended using a list of lists, or a dictionary of dictionaries to represent a trie. I found those to be too sloppy and hard to read, so I made this.
I focused mostly on the start_with_prefix capability, so my nodes' "data" objects are copies of the full string (although they could feasibly be anything) if the string is a word and None otherwise. Storing data this way prevents me from having to climb back up the tree (or remember where I am in the tree) to reconstruct the full word in Trie.start_with_prefix()