Last active
January 15, 2021 07:24
-
-
Save songron/4342248 to your computer and use it in GitHub Desktop.
KMP search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
KMP算法;字符串匹配/子串查找。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2014 Peking University. | |
// Author: Xiaosong Rong ([email protected]) | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
using namespace std; | |
class KMP { | |
private: | |
// find the longest common prefix-suffix substr, for every prefix-substr of t | |
// match[i] records the endpoint of the longest common prefix-suffix for t[0..i] | |
// e.g., t = "ababxabc", then match = {-1, -1, 0, 1, -1, 0, 1, -1} | |
void preprocess(string t, int match[], const int n) { | |
match[0] = -1; | |
int i = 0, j = 1; | |
while (j < n) { | |
if (t[j] == t[i]) { | |
match[j++] = i++; | |
} | |
else { | |
if (i == 0) { | |
match[j++] = -1; | |
} | |
else { | |
i = match[i-1] + 1; | |
} | |
} | |
} | |
} | |
public: | |
KMP() {} | |
~KMP() {} | |
void kmpsearch(string s, string t, vector<int> &index) ; | |
}; | |
void KMP::kmpsearch(string s, string t, vector<int> &index) { | |
const int n = t.size(), m = s.size(); | |
int *match = new int[n]; | |
preprocess(t, match, n); | |
int i = 0, j = 0; // i is for the pattern 't'; j is for the string 's' | |
while (j < m) { | |
while (i < n && j < m && s[j] == t[i]) { | |
i++; | |
j++; | |
} | |
if (i == n) { // find a match | |
index.push_back(j-n); | |
i = match[i-1] + 1; | |
continue; | |
} | |
if (j == m) break; | |
if (i == 0) { | |
j++; | |
} | |
else { | |
i = match[i-1] + 1; | |
} | |
} | |
delete [] match; | |
} | |
int main() { | |
KMP kmp; | |
string s, t; | |
vector<int> result; | |
while (1) { | |
cin >> s >> t; | |
if (s == "exit") break; | |
kmp.kmpsearch(s, t, result); | |
for (int i = 0; i < result.size(); i++) { | |
cout << result[i] << ' '; | |
} | |
cout << endl; | |
result.clear(); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*/ | |
Knuth-Morris-Pratt algorithm -- KMP-search for string matching. | |
Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm | |
Find t in s (t and s are both string) : | |
complexity of preprocess or kmpsearch is O(m), O(n) respectly. | |
n > m, so overall complexity of KMP algorithm is O(n) | |
*/ | |
#include <iostream> | |
#include <vector> | |
using namespace std; | |
void preprocess(string t, int *next, int n) | |
{ | |
int i=1, j=0; | |
while (i < n) { | |
if (t[i] == t[j]) | |
next[i++] = ++j; | |
else if (j == 0) // t[i] != t[j] | |
next[i++] = j; | |
else | |
j = next[j-1]; | |
} | |
} | |
vector<int> kmpsearch(string s, string t) | |
{ | |
vector<int> res; | |
int m=s.length(), n=t.length(); | |
int *next = new int [n]; | |
preprocess(t, next, n); | |
int i=0, j=0; | |
while (i < m && m-i >= n-j) { | |
while (j<n && s[i] == t[j]) { | |
i++; | |
j++; | |
} | |
if (j == n) { | |
res.push_back(i-n); | |
j = next[j-1]; | |
} | |
else { | |
if (j == 0) | |
i++; | |
else | |
j = next[j-1]; | |
} | |
} | |
delete [] next; | |
return res; | |
} | |
void show(vector<int> res) | |
{ | |
int n = res.size(); | |
for (int i=0; i<n; i++) | |
cout << res[i] << ' '; | |
cout << endl; | |
} | |
int main() | |
{ | |
string s, t; | |
cin >> s >> t; | |
vector<int> res = kmpsearch(s, t); | |
show(res); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding=utf8 | |
""" | |
Knuth-Morris-Pratt algorithm -- KMP-search for string matching. | |
Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm | |
Find t in s (t and s are both string) : | |
complexity of preprocess or kmpsearch is O(m), O(n) respectly. | |
n > m, so overall complexity of KMP algorithm is O(n) | |
""" | |
def preprocess(t): | |
m = len(t) | |
next = [0]*(m) | |
i = 1 | |
j = 0 | |
""" | |
In this loop, i+=1 is excuted exactly m times. | |
j=next[j-1], it will decrease the value of j by at least 1. | |
j can only be increased at most m times, | |
so j=next[j-1] will be excuted at most m times. | |
So, overall loop requires at most 2m=O(m) steps. | |
""" | |
while i<m: | |
if t[i] == t[j]: | |
j += 1 | |
next[i] = j | |
elif j == 0: | |
next[i] = 0 | |
else: | |
j = next[j-1] | |
continue | |
i += 1 | |
return next | |
def kmpsearch(s, t, next): | |
n = len(s) | |
m = len(t) | |
i = j = 0 | |
res = [] | |
""" | |
In this loop, i+=1 will be excuted at most n times. | |
j+=1 is excuted at most n times. | |
j=next[j-1] decrease j at least 1, so it will be excuted at most n times. | |
i+=1 is not excuted only when j=next[j-1] is excuted ! | |
So, overall loop requires at most 2n=O(n) steps. | |
""" | |
while i < n and m-j <= n-i: #if m-j>n-1, remain of t cannot match any substring of remain of s | |
while j<m and s[i] == t[j]: | |
i += 1 | |
j += 1 | |
if j == 0: | |
i += 1 | |
else: | |
if j == m: #find a result | |
res.append(i-j) | |
j = next[j-1] # decrease j at least 1 | |
return res | |
def test(): | |
s = 'abababaaasdfababaaasdfababa' | |
t = 'ababaa' | |
next = preprocess(t) | |
res = kmpsearch(s, t, next) | |
print res | |
if __name__ == '__main__': | |
test() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <vector> | |
using namespace std; | |
/* | |
Very efficient, but only work when the length of the pattern is no longer than the memory-word size of the machine. | |
*/ | |
class ShiftAnd { | |
private: | |
void preprocess(string t, long long *appear, const int n) { | |
for (int i = 0; i < n; i++) { | |
appear[t[i]] |= 1 << i; // appear[c] records where c appears | |
} | |
} | |
public: | |
void search(string s, string t, vector<int> &index); | |
}; | |
void ShiftAnd::search(string s, string t, vector<int> &index) { | |
const int m = s.size(), n = t.size(); | |
long long *appear = new long long[256](); | |
preprocess(t, appear, n); | |
long long p = 0, high = 1 << (n - 1); // high = 1000...000 | |
// the ith bit of p is 1 means that t[0..i] has been matched. | |
// So, the (n-1)th bit is 1 means that the pattern t has been matched. | |
for (int i = 0; i < m; i++) { | |
p = (p << 1 | 1) & appear[s[i]]; | |
if (p & high) index.push_back(i-n+1); | |
} | |
delete [] appear; | |
} | |
int main() { | |
ShiftAnd sa; | |
string s, t; | |
vector<int> result; | |
while (1) { | |
cin >> s >> t; | |
if (s == "exit") break; | |
sa.search(s, t, result); | |
for (int i = 0; i < result.size(); i++) { | |
cout << result[i] << ' '; | |
} | |
cout << endl; | |
result.clear(); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <vector> | |
using namespace std; | |
class ShiftOr { | |
private: | |
void preprocess(string t, long long *appear, const int n) { | |
for (int i = 0; i < n; i++) { | |
appear[t[i]] ^= 1 << i; | |
} | |
} | |
public: | |
void search(string s, string t, vector<int> &index); | |
}; | |
void ShiftOr::search(string s, string t, vector<int> &index) { | |
const int m = s.size(), n = t.size(); | |
long long *appear = new long long[256](); | |
for (int i = 0; i < 256; i++) appear[i] = ~0; | |
preprocess(t, appear, n); | |
long long p = ~0, high = 1 << (n - 1); // high = 1000...000 | |
for (int i = 0; i < m; i++) { | |
// p = (p << 1 | 1) & appear[s[i]]; | |
p = (p << 1) | appear[s[i]]; | |
if ((~p) & high) index.push_back(i-n+1); | |
} | |
delete [] appear; | |
} | |
int main() { | |
ShiftOr so; | |
string s, t; | |
vector<int> result; | |
while (1) { | |
cin >> s >> t; | |
if (s == "exit") break; | |
so.search(s, t, result); | |
for (int i = 0; i < result.size(); i++) { | |
cout << result[i] << ' '; | |
} | |
cout << endl; | |
result.clear(); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
asdf asdf | |
asdfasdf asdf | |
abcasdfasdfefg asdf | |
abcabcabcefg abcabce | |
aaaaaaa aaaa | |
exit exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment