Last active
December 11, 2015 22:49
-
-
Save mpenkov/4672622 to your computer and use it in GitHub Desktop.
Coding Interview Practice: Tries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
# | |
# Lessons learned: | |
# | |
# - must override object class to get __sizeof__ to work | |
# | |
class TrieNode(object): | |
def __init__(self, leaf): | |
self.children = dict() | |
self.leaf = leaf | |
def __sizeof__(self): | |
return object.__sizeof__(self) + sys.getsizeof(self.children) + sys.getsizeof(self.leaf) | |
def insert_word(trie, word): | |
if not word: | |
return | |
try: | |
child = trie.children[word[0]] | |
except KeyError: | |
child = TrieNode(len(word) == 1) | |
trie.children[word[0]] = child | |
insert_word(child, word[1:]) | |
def find_words(trie, prefix): | |
for c in prefix: | |
try: | |
trie = trie.children[c] | |
except KeyError: | |
return list() | |
root = (trie, prefix) | |
stack = [ root ] | |
words = list() | |
while stack: | |
node, cprefix = stack.pop() | |
if node.leaf: | |
words.append(cprefix) | |
for l in node.children: | |
child = (node.children[l], cprefix+l) | |
stack.append(child) | |
return words | |
def size_of_trie(root): | |
stack = [ root ] | |
bytes = 0 | |
nodes = 0 | |
while stack: | |
node = stack.pop() | |
nodes += 1 | |
bytes += sys.getsizeof(node) | |
for l in node.children: | |
stack.append(node.children[l]) | |
return bytes, nodes | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print "usage: python %s words.txt" % __file__ | |
sys.exit(1) | |
with open(sys.argv[1]) as fin: | |
words = fin.read().strip().split("\n") | |
root = TrieNode(False) | |
for w in words: | |
insert_word(root, w) | |
bytes, nodes = size_of_trie(root) | |
print "read %d words (%d nodes, %dMiB). Enter a prefix to search, EOF to exit:" % (len(words), nodes, bytes/1e6) | |
while True: | |
try: | |
prefix = raw_input("> ") | |
print find_words(root, prefix) | |
except EOFError: | |
break |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
foo | |
foobar | |
fool | |
for | |
foot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment