Created
June 13, 2023 14:50
-
-
Save cod3smith/1ea6a151cfd134174fc8cef689a24ece to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TrieNode: | |
def __init__(self): | |
self.children = {} | |
self.output = [] | |
class AhoCorasick: | |
def __init__(self): | |
self.root = TrieNode() | |
def add_pattern(self, pattern): | |
node = self.root | |
for char in pattern: | |
if char not in node.children: | |
node.children[char] = TrieNode() | |
node = node.children[char] | |
node.output.append(pattern) | |
def build_fail_transitions(self): | |
queue = [] | |
for node in self.root.children.values(): | |
queue.append(node) | |
node.fail = self.root | |
while queue: | |
current_node = queue.pop(0) | |
for char, child in current_node.children.items(): | |
queue.append(child) | |
fail_node = current_node.fail | |
while fail_node != self.root and char not in fail_node.children: | |
fail_node = fail_node.fail | |
child.fail = fail_node.children.get(char, self.root) | |
child.output += child.fail.output | |
def search_patterns(self, text): | |
node = self.root | |
patterns_found = [] | |
for i, char in enumerate(text): | |
while node != self.root and char not in node.children: | |
node = node.fail | |
if char in node.children: | |
node = node.children[char] | |
patterns_found.extend(node.output) | |
if patterns_found: | |
print(f"Pattern(s) found at index {i}: {patterns_found}") | |
# Example usage | |
ac = AhoCorasick() | |
ac.add_pattern("apple") | |
ac.add_pattern("banana") | |
ac.add_pattern("orange") | |
ac.build_fail_transitions() | |
ac.search_patterns("I have an apple and a banana in my fruit basket.") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment