Created
June 14, 2017 00:43
-
-
Save alothings/95c726a68e7513e70f6ceb0c1644adc8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Pattern matching problem | |
Boyer Moore algorithm | |
First is my attempt, below is the code provided in the book | |
Idea: | |
Optimize brute force approach using 2 heuristics: | |
- Looking-Glass: start searches from last character of the | |
pattern and work backwards | |
- Character-Jump: During testing of a pattern P, a mismatch | |
in T[i] = c with corresponding pattern P[k] is handled: | |
a) if C is not contained in P, shift P completely past i. | |
b) if c is contained in P shift P until an occurrence of c | |
gets aligned with T[i] | |
""" | |
def find_boyer_moore(T, P): | |
""" return lowest index of T at which the substring P begins or -1""" | |
n, m = len(T), len(P) | |
if m == 0: return 0 | |
last = {} # Using hash table for fast access | |
for k in range(m): | |
last[P[k]] = k | |
i = m - 1 # i index at T, k index at P | |
k = m - 1 # j index of last occurrence of T[i] in P | |
while i < n: | |
if T[i] == P[k]: # if chars are equal | |
i -= 1 # THIS | |
k -= 1 # THIS | |
if k == 0: # THIS | |
return i # check if Patter is complete THIS | |
else: | |
# if j < k (remember k index at P) | |
# shift i += m - (j+1) | |
# if j > k | |
# shift i += m - k | |
j = last.get(T[i], -1) # -1 if item not there | |
i += m - (min(k, j+1)) | |
k = m - 1 | |
return -1 | |
def find_boyer_moore2(T, P): | |
""" return lowest index of T at which the substring P begins or -1""" | |
n, m = len(T), len(P) | |
if m == 0: return 0 | |
last = {} # Using hash table for fast access | |
for k in range(m): | |
last[P[k]] = k | |
i = m - 1 # i index at T, k index at P | |
k = m - 1 # j index of last occurrence of T[i] in P | |
while i < n: | |
if T[i] == P[k]: # if chars are equal | |
if k == 0: | |
return i # check if Patter is complete | |
else: | |
i -= 1 # normal iteration | |
k -= 1 | |
else: | |
# if j < k (remember k index at P) | |
# shift i += m - (j+1) | |
# if j > k | |
# shift i += m - k | |
j = last.get(T[i], -1) # -1 if item not there | |
i += m - (min(k, j+1)) | |
k = m - 1 | |
return -1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment