Created
March 6, 2015 16:05
-
-
Save mengzhuo/d713cb8be6c871995b79 to your computer and use it in GitHub Desktop.
Boyer Moore Demo in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
Boyer Moore Demo | |
Inspired by https://www.youtube.com/watch?v=PHXAOKQk2dw | |
Copyright (C) 2015 Meng Zhuo <[email protected]> | |
""" | |
def make_bad_match_table(pattern): | |
length = len(pattern) | |
table = {} | |
for i, c in enumerate(pattern): | |
if i == length-1 and not c in table: | |
table[c] = length | |
else: | |
table[c] = length - i - 1 | |
return table | |
def boyer_moore(pattern, text): | |
match_table = [] | |
pattern_length = len(pattern) | |
text_length = len(text) | |
if pattern_length > text_length: | |
return match_table | |
table = make_bad_match_table(pattern) | |
index = pattern_length - 1 | |
pattern_index = pattern_length - 1 | |
while index < text_length: | |
if pattern[pattern_index] == text[index]: | |
if pattern_index == 0: | |
match_table.append(index) | |
pattern_index = pattern_length - 1 | |
index += (pattern_length * 2 - 1) | |
else: | |
pattern_index -= 1 | |
index -= 1 | |
else: | |
index += table.get(text[index], pattern_length) | |
pattern_index = pattern_length - 1 | |
return match_table | |
if __name__ == '__main__': | |
pattern = "tooth" | |
target = "trusthardtoothbrushestoothgood" | |
print(boyer_moore(pattern, target)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Found a problem in your implementation, may cause infinite loop. Took me an hour but thank God I figured it out, the YouTube link was very helpful.
Change Line 45 to the following block to solve the bug:
if pattern_index != len_of_pattern - 1:
index += table.get(text[index], len_of_pattern)
else:
index += table.get(pattern[pattern_index], len_of_pattern)