Last active
October 21, 2024 07:04
-
-
Save m00nlight/daa6786cc503fde12a77 to your computer and use it in GitHub Desktop.
Python KMP algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class KMP: | |
def partial(self, pattern): | |
""" Calculate partial match table: String -> [Int]""" | |
ret = [0] | |
for i in range(1, len(pattern)): | |
j = ret[i - 1] | |
while j > 0 and pattern[j] != pattern[i]: | |
j = ret[j - 1] | |
ret.append(j + 1 if pattern[j] == pattern[i] else j) | |
return ret | |
def search(self, T, P): | |
""" | |
KMP search main algorithm: String -> String -> [Int] | |
Return all the matching position of pattern string P in T | |
""" | |
partial, ret, j = self.partial(P), [], 0 | |
for i in range(len(T)): | |
while j > 0 and T[i] != P[j]: | |
j = partial[j - 1] | |
if T[i] == P[j]: j += 1 | |
if j == len(P): | |
ret.append(i - (j - 1)) | |
j = partial[j - 1] | |
return ret | |
def test(): | |
p1 = "aa" | |
t1 = "aaaaaaaa" | |
kmp = KMP() | |
assert(kmp.search(t1, p1) == [0, 1, 2, 3, 4, 5, 6]) | |
p2 = "abc" | |
t2 = "abdabeabfabc" | |
assert(kmp.search(t2, p2) == [9]) | |
p3 = "aab" | |
t3 = "aaabaacbaab" | |
assert(kmp.search(t3, p3) == [1, 8]) | |
p4 = "11" | |
t4 = "111" | |
assert(kmp.search(t4, p4) == [0, 1]) | |
print("all test pass") |
Fails on
p5 = "ABC ABCDAB ABCDABCDABDE"
t5 = "ABCDABD"
assert (kmp.search(t5, p5) == [15])
Fails on p5 = "ABC ABCDAB ABCDABCDABDE" t5 = "ABCDABD" assert (kmp.search(t5, p5) == [15])
you need to switch t5 and p5 (t is the target string)...
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I think the for loop in the partial() function is incorrect. There should be a while loop inside the for loop. A counter example for your partial function would be input pattern 'a a b a a a c'