Created
July 4, 2013 08:37
-
-
Save NalaGinrut/5925985 to your computer and use it in GitHub Desktop.
KMP string matching algorithm in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sets import Set | |
def gen_xfix_set(string, generator): | |
ret,n = Set([]),len(string)-1 | |
while True: | |
if n==0: return ret | |
else: | |
ret.add(generator(string, n)) | |
n -= 1 | |
def gen_prefix_set(string): | |
return gen_xfix_set(string, lambda s,n: s[:n]) | |
def gen_suffix_set(string): | |
return gen_xfix_set(string, lambda s,n: s[n:]) | |
# count partial matches | |
def cpm(string): | |
if string=="": raise(Exception("it's null string!")) | |
prefix,suffix=gen_prefix_set(string),gen_suffix_set(string) | |
return len(prefix.intersection(suffix)) | |
def kmp(string, what): | |
lstr = len(string) | |
lwhat = len(what) | |
if lstr < lwhat: return False | |
i,j=0,0 | |
while True: | |
if i > lstr: return False | |
elif string[i] == what[j]: | |
if j == (lwhat-1): return i-j,i+1 | |
else: i,j=i+1,j+1 | |
elif j==0: i,j=i+1,j | |
else: i,j=i+j-cpm(what[:j]),0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment