Created
March 17, 2012 00:04
-
-
Save mattdeboard/2053794 to your computer and use it in GitHub Desktop.
Knuth-Morris-Pratt algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def kmp_match(seq, subseq): | |
""" | |
Implementation of the Knuth-Morris-Pratt (KMP) algorithm in Python. | |
This algorithm finds valid shifts to locate subsequence `subseq` in | |
sequence `seq`. | |
""" | |
n = len(seq) | |
m = len(subseq) | |
pi = prefix(subseq) | |
k = 0 | |
for i in range(n): | |
while k > 0 and subseq[k] != seq[i]: | |
k = pi[k-1] | |
if subseq[k] == seq[i]: | |
k += 1 | |
if k == m: | |
print u"Pattern occurs with shift %d" % ((i+1) - m) | |
k = 0 | |
def prefix(seq): | |
"""Checks seq for valid shifts against itself.""" | |
m = len(seq) | |
pi = range(m) | |
k = 0 | |
for i in pi[1:]: | |
while k > 0 and seq[k+1] != seq[i]: | |
k = pi[k] | |
if seq[k+1] == seq[i]: | |
k = k + 1 | |
pi[i] = k | |
return pi | |
# Example | |
g = """Suppose you have an array of 99 numbers. The array contains the | |
digits 1 to 100 with one digit missing. Describe four different algorithms to | |
compute the missing number. Two of these should optimize for low storage and two | |
of these should optimize for fast processing.""" | |
substr = "missing" | |
# >> kmp_match(g, substr) | |
# Pattern occurs with shift 95 | |
# Pattern occurs with shift 154 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment