Created
July 20, 2013 15:52
-
-
Save samueljackson92/6045522 to your computer and use it in GitHub Desktop.
Knuth-Morris-Pratt (KMP) substring searhc algorithm implementation in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####################################################### | |
# Knuth-Morris-Pratt Algorithm | |
# Author: Samuel Jackson ([email protected]) | |
# Date: 20/07/2013 | |
####################################################### | |
# implementation of KMP Search | |
def kmp_search(s, w): | |
m = 0 # current search start point | |
i = 0 # current index in target word | |
# build fall back table of target string | |
t = kmp_table(w) | |
# while the current match start point | |
# is less than the search string's length | |
while (m+i) < len(s): | |
# check for a match | |
if(w[i] == s[m+i]): | |
# check for a full match | |
if(i == len(w)-1): | |
# return start index of match | |
return m | |
i = i + 1 | |
else: | |
# jump to start of next possible match | |
m = m + i - t[i] | |
# correct for start of string being a special case | |
if(t[i] > -1): | |
i = t[i] | |
else: | |
i = 0 | |
# if we failed, return the length of the search string | |
return len(s) | |
# KMP fall back table generation function | |
def kmp_table(word): | |
# we automatically know the initial positions of | |
# the first two charaters | |
t={0:-1,1:0} | |
# current table position to compute | |
pos = len(t) | |
# index in word of the next candidate substring character | |
cnd = 0 | |
# While the table index is less than the word size | |
while pos < len(word): | |
# check if current substring continues | |
if(w[pos-1] == w[cnd]): | |
cnd = cnd+1 | |
t[pos] = cnd | |
pos += 1 | |
# if not, can we fallback? | |
elif (cnd > 0): | |
cnd = t[cnd] | |
# we ran out of candidates, start from zero | |
else: | |
t[pos] = 0 | |
pos += 1 | |
return t | |
s = "ABC ABCDAB ABCDABCDABDE" | |
w = "ABCDABD" | |
print kmp_search(s,w) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment