Skip to content

Instantly share code, notes, and snippets.

@tanishiking
Last active November 13, 2016 17:17
Show Gist options
  • Save tanishiking/a9f5a05877c1ab786c47a965669b9103 to your computer and use it in GitHub Desktop.
Save tanishiking/a9f5a05877c1ab786c47a965669b9103 to your computer and use it in GitHub Desktop.
Trie matching
__pycache__/
*.pyc

Trie

  • problem1
  • problem2
  • problem3
  • problem4
  • [] problem5
#!/usr/bin/python
import sys
from trie import Trie
NA = -1
class Node:
def __init__ (self):
self.next = [NA] * 4
def solve (text, patterns):
result = []
trie = Trie()
tree = trie.build_tree(patterns)
for pos in range(len(text)):
char = text[pos]
matched = trie.prefix_trie_match(text[pos:])
if matched is not None:
result.append(pos)
return result
if __name__ == '__main__':
text = sys.stdin.readline().strip()
n = int(sys.stdin.readline().strip())
patterns = []
for i in range (n):
patterns += [sys.stdin.readline ().strip()]
ans = solve(text, patterns)
sys.stdout.write (' '.join (map (str, ans)) + '\n')
#!/usr/bin/python
import sys
from suffix_trie import SuffixTrie
if __name__ == '__main__':
text = sys.stdin.readline().strip()
trie = SuffixTrie()
tree = trie.build_suffix_tree(text)
trie.dump_tree()
# print("\n".join(result))
class SuffixTrie:
def __init__(self):
self.__tree = {0: {}}
self.__END = "$"
def build_suffix_tree(self, text):
text += self.__END
tree = {0: {}}
auto_increment = 0
for start in range(len(text)):
pattern = text[start:]
current_node = 0
for char in pattern:
if char in tree[current_node].keys():
current_node = tree[current_node][char]
else:
auto_increment += 1
new_node = auto_increment
if char == self.__END:
tree[new_node] = {"start": start}
else:
tree[new_node] = {}
tree[current_node][char] = new_node
current_node = new_node
self.__tree = tree
self.__compress()
return tree
def __compress(self):
current_node = 0
self.__compress_recursive(current_node)
def __compress_recursive(self, node):
# TODO: Refactor
# key: path from node to child_node
# path_char: path from child_node to grand_child_node
for (key, child_node) in self.__tree[node].items():
if key == "start":
# 自分が終端ノードなら何もしない
# 子供を孫につなぎかえたときに起こりうる
return
if len(self.__tree[child_node]) == 1:
# ある子供の子供が一人なら
path_char = next(iter(self.__tree[child_node]))
if path_char != "start":
# 子供が終端ノードなら何もしない
# 子供を孫につなぎかえる
grand_child_node = self.__tree[child_node][path_char]
self.__tree[node][key + path_char] = grand_child_node
# いらない子供を殺す
del self.__tree[node][key]
del self.__tree[child_node]
self.__compress_recursive(grand_child_node)
else:
self.__compress_recursive(child_node)
def dump_tree(self):
# print(self.__tree)
for node in self.__tree:
for c in self.__tree[node]:
if c != "start":
print(c)
class Trie():
def __init__(self):
self.tree = {0: {}}
self.__patterns = set()
def build_tree(self, patterns):
# tree looks like {0:{'A':1,'T':2},1:{'C':3}}
tree = {0: {}}
self.__patterns = set(patterns)
# node id
auto_increment = 0
for pattern in patterns:
# start from root node for each pattern
current_node = 0
for c in pattern:
if c in tree[current_node].keys():
current_node = tree[current_node][c]
else:
auto_increment += 1
new_node = auto_increment
# add new node
tree[new_node] = {}
tree[current_node][c] = new_node
current_node = new_node
self.tree = tree
return tree
def prefix_trie_match(self, text):
"""
problem 2, 3
return first matched string with the builded tree
"""
path_chars = []
length = len(text)
current_node = 0
text_iter = 0
while True:
# if crrent_node is leaf node
if len(self.tree[current_node]) == 0:
return "".join(path_chars)
if text_iter >= length:
# reach to end of text
return self.__match_or_none(path_chars)
# get next character
symbol = text[text_iter]
child_node = self.tree[current_node].get(symbol)
if child_node is None:
# no match child node
return self.__match_or_none(path_chars)
else:
path_chars.append(symbol)
current_node = child_node
text_iter += 1
def __match_or_none(self, path_chars):
"""
if string for the path is one of the patterns
return matched string
else if not, return None
"""
matched = "".join(path_chars)
if matched in self.__patterns:
return matched
return None
def dump_tree(self):
for node in self.tree:
for c in self.tree[node]:
print("{}->{}:{}".format(node, self.tree[node][c], c))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment