Last active
August 29, 2015 14:27
-
-
Save zhangys-lucky/3f620cfbf61949c21c10 to your computer and use it in GitHub Desktop.
find top k words using Trie and MinHeap(Python Code)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TrieNode(object): | |
# Initialize your data structure here. | |
def __init__(self): | |
self.count = 0 | |
self.children = {} | |
class Trie(object): | |
def __init__(self): | |
self.root = TrieNode() | |
# @param {string} word | |
# @return {void} | |
# Inserts a word into the trie. | |
def insert(self, word): | |
node = self.root | |
for w in word: | |
if w not in node.children: | |
node.children[w] = TrieNode() | |
node = node.children[w] | |
node.count += 1 | |
def _startsWith(self,prefix): | |
""" | |
@param {string} prefix | |
@return {TrieNode} or None | |
""" | |
node = self.root | |
for w in prefix: | |
if w in node.children: | |
node = node.children[w] | |
else: | |
return None | |
return node | |
# @param {string} word | |
# @return {boolean} | |
# Returns if the word is in the trie. | |
def search(self, word): | |
node = self._startsWith(word) | |
if node and node.v: | |
return True | |
else: | |
return False | |
# @param {string} prefix | |
# @return {boolean} | |
# Returns if there is any word in the trie | |
# that starts with the given prefix. | |
def startsWith(self, prefix): | |
if self._startsWith(prefix): | |
return True | |
else: | |
return False | |
def topkword(trie,prefix_word,k): | |
prefix_node = trie._startsWith(prefix_word) | |
import heapq | |
heap = [] | |
import functools | |
@functools.total_ordering | |
class HeapNode(object): | |
def __init__(self,s,count): | |
self.s = s | |
self.count = count | |
def __eq__(self, other): | |
return self.count == other.count | |
def __lt__(self, other): | |
return self.count < other.count | |
def dfs_walk_trie(node,s,heap): | |
for c in node.children: | |
child = node.children[c] | |
if child.count > 0: | |
# update heap | |
if len(heap) <= k: | |
heapq.heappush(heap,HeapNode(s + c,child.count)) | |
else: | |
heapq.heappushpop(heap,HeapNode(s + c,child.count)) | |
else: | |
dfs_walk_trie(child,s + c,heap) | |
pass | |
dfs_walk_trie(prefix_node,prefix_word,heap) | |
sort = [] | |
while heap: | |
sort.append(heapq.heappop(heap)) | |
return [(n.s,n.count) for n in sort][::-1] | |
trie = Trie() | |
# create trie tree | |
for w in ["hell","hell","hess","hess","hess","hess","hewpwpww","hiiii"]: | |
trie.insert(w) | |
# print top 3 with 'he' prefix | |
print(topkword(trie,"he",3)) | |
print(topkword(trie,"hi",3)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment