- problem1
- problem2
- problem3
- problem4
- [] problem5
Last active
November 13, 2016 17:17
-
-
Save tanishiking/a9f5a05877c1ab786c47a965669b9103 to your computer and use it in GitHub Desktop.
Trie matching
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__pycache__/ | |
*.pyc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
from trie import Trie | |
NA = -1 | |
class Node: | |
def __init__ (self): | |
self.next = [NA] * 4 | |
def solve (text, patterns): | |
result = [] | |
trie = Trie() | |
tree = trie.build_tree(patterns) | |
for pos in range(len(text)): | |
char = text[pos] | |
matched = trie.prefix_trie_match(text[pos:]) | |
if matched is not None: | |
result.append(pos) | |
return result | |
if __name__ == '__main__': | |
text = sys.stdin.readline().strip() | |
n = int(sys.stdin.readline().strip()) | |
patterns = [] | |
for i in range (n): | |
patterns += [sys.stdin.readline ().strip()] | |
ans = solve(text, patterns) | |
sys.stdout.write (' '.join (map (str, ans)) + '\n') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
from suffix_trie import SuffixTrie | |
if __name__ == '__main__': | |
text = sys.stdin.readline().strip() | |
trie = SuffixTrie() | |
tree = trie.build_suffix_tree(text) | |
trie.dump_tree() | |
# print("\n".join(result)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SuffixTrie: | |
def __init__(self): | |
self.__tree = {0: {}} | |
self.__END = "$" | |
def build_suffix_tree(self, text): | |
text += self.__END | |
tree = {0: {}} | |
auto_increment = 0 | |
for start in range(len(text)): | |
pattern = text[start:] | |
current_node = 0 | |
for char in pattern: | |
if char in tree[current_node].keys(): | |
current_node = tree[current_node][char] | |
else: | |
auto_increment += 1 | |
new_node = auto_increment | |
if char == self.__END: | |
tree[new_node] = {"start": start} | |
else: | |
tree[new_node] = {} | |
tree[current_node][char] = new_node | |
current_node = new_node | |
self.__tree = tree | |
self.__compress() | |
return tree | |
def __compress(self): | |
current_node = 0 | |
self.__compress_recursive(current_node) | |
def __compress_recursive(self, node): | |
# TODO: Refactor | |
# key: path from node to child_node | |
# path_char: path from child_node to grand_child_node | |
for (key, child_node) in self.__tree[node].items(): | |
if key == "start": | |
# 自分が終端ノードなら何もしない | |
# 子供を孫につなぎかえたときに起こりうる | |
return | |
if len(self.__tree[child_node]) == 1: | |
# ある子供の子供が一人なら | |
path_char = next(iter(self.__tree[child_node])) | |
if path_char != "start": | |
# 子供が終端ノードなら何もしない | |
# 子供を孫につなぎかえる | |
grand_child_node = self.__tree[child_node][path_char] | |
self.__tree[node][key + path_char] = grand_child_node | |
# いらない子供を殺す | |
del self.__tree[node][key] | |
del self.__tree[child_node] | |
self.__compress_recursive(grand_child_node) | |
else: | |
self.__compress_recursive(child_node) | |
def dump_tree(self): | |
# print(self.__tree) | |
for node in self.__tree: | |
for c in self.__tree[node]: | |
if c != "start": | |
print(c) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Trie(): | |
def __init__(self): | |
self.tree = {0: {}} | |
self.__patterns = set() | |
def build_tree(self, patterns): | |
# tree looks like {0:{'A':1,'T':2},1:{'C':3}} | |
tree = {0: {}} | |
self.__patterns = set(patterns) | |
# node id | |
auto_increment = 0 | |
for pattern in patterns: | |
# start from root node for each pattern | |
current_node = 0 | |
for c in pattern: | |
if c in tree[current_node].keys(): | |
current_node = tree[current_node][c] | |
else: | |
auto_increment += 1 | |
new_node = auto_increment | |
# add new node | |
tree[new_node] = {} | |
tree[current_node][c] = new_node | |
current_node = new_node | |
self.tree = tree | |
return tree | |
def prefix_trie_match(self, text): | |
""" | |
problem 2, 3 | |
return first matched string with the builded tree | |
""" | |
path_chars = [] | |
length = len(text) | |
current_node = 0 | |
text_iter = 0 | |
while True: | |
# if crrent_node is leaf node | |
if len(self.tree[current_node]) == 0: | |
return "".join(path_chars) | |
if text_iter >= length: | |
# reach to end of text | |
return self.__match_or_none(path_chars) | |
# get next character | |
symbol = text[text_iter] | |
child_node = self.tree[current_node].get(symbol) | |
if child_node is None: | |
# no match child node | |
return self.__match_or_none(path_chars) | |
else: | |
path_chars.append(symbol) | |
current_node = child_node | |
text_iter += 1 | |
def __match_or_none(self, path_chars): | |
""" | |
if string for the path is one of the patterns | |
return matched string | |
else if not, return None | |
""" | |
matched = "".join(path_chars) | |
if matched in self.__patterns: | |
return matched | |
return None | |
def dump_tree(self): | |
for node in self.tree: | |
for c in self.tree[node]: | |
print("{}->{}:{}".format(node, self.tree[node][c], c)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment