Created
January 15, 2015 14:15
-
-
Save matthewfranglen/64e417598355f811d900 to your computer and use it in GitHub Desktop.
Example use of Trie to find substring matches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# pylint: disable=C0111, R0903 | |
class Trie(object): | |
def __init__(self): | |
self.children = {} | |
def add_child(self, letter): | |
if letter in self.children: | |
return self.children[letter] | |
else: | |
child = Trie() | |
self.children[letter] = child | |
return child | |
def traverse(self, letter): | |
return self.children.get(letter, None) | |
def populate_trie(root, letters): | |
current_positions = [] | |
for letter in letters: | |
current_positions = [ | |
position.add_child(letter) | |
for position in current_positions | |
] | |
current_positions.append(root.add_child(letter)) | |
return root | |
class TrieSearch(object): | |
def __init__(self, trie, starting_index): | |
self.trie = trie | |
self.starting_index = starting_index | |
self.ending_index = starting_index + 1 | |
def update(self, letter): | |
""" This returns a boolean indicating | |
if the search can accept the letter """ | |
self.trie = self.trie.traverse(letter) | |
if self.trie is not None: | |
self.ending_index = self.ending_index + 1 | |
return True | |
return False | |
def get_match(self, letters): | |
return letters[self.starting_index:self.ending_index] | |
def find_matches(root, letters): | |
completed_matches = [] | |
current_matches = [] | |
for index, letter in enumerate(letters): | |
new_current = [] | |
for current in current_matches: | |
if current.update(letter): | |
new_current.append(current) | |
else: | |
completed_matches.append(current) | |
new_search_trie = root.traverse(letter) | |
if new_search_trie is not None: | |
new_current.append(TrieSearch(new_search_trie, index)) | |
current_matches = new_current | |
all_matches = completed_matches + current_matches | |
return [match.get_match(letters) for match in all_matches] | |
def run(): | |
fiveprime = "GATTCGAAGTCCACTATC" | |
threeprime = "TGAGTAGGACGGCACTATC" | |
root = Trie() | |
populate_trie(root, fiveprime) | |
populate_trie(root, threeprime) | |
matches = find_matches(root, "CACTATCAAAAAAA") | |
print matches | |
if __name__ == "__main__": | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment