Skip to content

Instantly share code, notes, and snippets.

@songron
Last active January 15, 2021 07:24
Show Gist options
  • Save songron/4342248 to your computer and use it in GitHub Desktop.
Save songron/4342248 to your computer and use it in GitHub Desktop.
KMP search
KMP算法;字符串匹配/子串查找。
// Copyright (c) 2014 Peking University.
// Author: Xiaosong Rong ([email protected])
#include <iostream>
#include <string>
#include <vector>
using namespace std;
class KMP {
private:
// find the longest common prefix-suffix substr, for every prefix-substr of t
// match[i] records the endpoint of the longest common prefix-suffix for t[0..i]
// e.g., t = "ababxabc", then match = {-1, -1, 0, 1, -1, 0, 1, -1}
void preprocess(string t, int match[], const int n) {
match[0] = -1;
int i = 0, j = 1;
while (j < n) {
if (t[j] == t[i]) {
match[j++] = i++;
}
else {
if (i == 0) {
match[j++] = -1;
}
else {
i = match[i-1] + 1;
}
}
}
}
public:
KMP() {}
~KMP() {}
void kmpsearch(string s, string t, vector<int> &index) ;
};
void KMP::kmpsearch(string s, string t, vector<int> &index) {
const int n = t.size(), m = s.size();
int *match = new int[n];
preprocess(t, match, n);
int i = 0, j = 0; // i is for the pattern 't'; j is for the string 's'
while (j < m) {
while (i < n && j < m && s[j] == t[i]) {
i++;
j++;
}
if (i == n) { // find a match
index.push_back(j-n);
i = match[i-1] + 1;
continue;
}
if (j == m) break;
if (i == 0) {
j++;
}
else {
i = match[i-1] + 1;
}
}
delete [] match;
}
int main() {
KMP kmp;
string s, t;
vector<int> result;
while (1) {
cin >> s >> t;
if (s == "exit") break;
kmp.kmpsearch(s, t, result);
for (int i = 0; i < result.size(); i++) {
cout << result[i] << ' ';
}
cout << endl;
result.clear();
}
return 0;
}
/*/
Knuth-Morris-Pratt algorithm -- KMP-search for string matching.
Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
Find t in s (t and s are both string) :
complexity of preprocess or kmpsearch is O(m), O(n) respectly.
n > m, so overall complexity of KMP algorithm is O(n)
*/
#include <iostream>
#include <vector>
using namespace std;
void preprocess(string t, int *next, int n)
{
int i=1, j=0;
while (i < n) {
if (t[i] == t[j])
next[i++] = ++j;
else if (j == 0) // t[i] != t[j]
next[i++] = j;
else
j = next[j-1];
}
}
vector<int> kmpsearch(string s, string t)
{
vector<int> res;
int m=s.length(), n=t.length();
int *next = new int [n];
preprocess(t, next, n);
int i=0, j=0;
while (i < m && m-i >= n-j) {
while (j<n && s[i] == t[j]) {
i++;
j++;
}
if (j == n) {
res.push_back(i-n);
j = next[j-1];
}
else {
if (j == 0)
i++;
else
j = next[j-1];
}
}
delete [] next;
return res;
}
void show(vector<int> res)
{
int n = res.size();
for (int i=0; i<n; i++)
cout << res[i] << ' ';
cout << endl;
}
int main()
{
string s, t;
cin >> s >> t;
vector<int> res = kmpsearch(s, t);
show(res);
return 0;
}
#coding=utf8
"""
Knuth-Morris-Pratt algorithm -- KMP-search for string matching.
Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
Find t in s (t and s are both string) :
complexity of preprocess or kmpsearch is O(m), O(n) respectly.
n > m, so overall complexity of KMP algorithm is O(n)
"""
def preprocess(t):
m = len(t)
next = [0]*(m)
i = 1
j = 0
"""
In this loop, i+=1 is excuted exactly m times.
j=next[j-1], it will decrease the value of j by at least 1.
j can only be increased at most m times,
so j=next[j-1] will be excuted at most m times.
So, overall loop requires at most 2m=O(m) steps.
"""
while i<m:
if t[i] == t[j]:
j += 1
next[i] = j
elif j == 0:
next[i] = 0
else:
j = next[j-1]
continue
i += 1
return next
def kmpsearch(s, t, next):
n = len(s)
m = len(t)
i = j = 0
res = []
"""
In this loop, i+=1 will be excuted at most n times.
j+=1 is excuted at most n times.
j=next[j-1] decrease j at least 1, so it will be excuted at most n times.
i+=1 is not excuted only when j=next[j-1] is excuted !
So, overall loop requires at most 2n=O(n) steps.
"""
while i < n and m-j <= n-i: #if m-j>n-1, remain of t cannot match any substring of remain of s
while j<m and s[i] == t[j]:
i += 1
j += 1
if j == 0:
i += 1
else:
if j == m: #find a result
res.append(i-j)
j = next[j-1] # decrease j at least 1
return res
def test():
s = 'abababaaasdfababaaasdfababa'
t = 'ababaa'
next = preprocess(t)
res = kmpsearch(s, t, next)
print res
if __name__ == '__main__':
test()
#include <iostream>
#include <string>
#include <vector>
using namespace std;
/*
Very efficient, but only work when the length of the pattern is no longer than the memory-word size of the machine.
*/
class ShiftAnd {
private:
void preprocess(string t, long long *appear, const int n) {
for (int i = 0; i < n; i++) {
appear[t[i]] |= 1 << i; // appear[c] records where c appears
}
}
public:
void search(string s, string t, vector<int> &index);
};
void ShiftAnd::search(string s, string t, vector<int> &index) {
const int m = s.size(), n = t.size();
long long *appear = new long long[256]();
preprocess(t, appear, n);
long long p = 0, high = 1 << (n - 1); // high = 1000...000
// the ith bit of p is 1 means that t[0..i] has been matched.
// So, the (n-1)th bit is 1 means that the pattern t has been matched.
for (int i = 0; i < m; i++) {
p = (p << 1 | 1) & appear[s[i]];
if (p & high) index.push_back(i-n+1);
}
delete [] appear;
}
int main() {
ShiftAnd sa;
string s, t;
vector<int> result;
while (1) {
cin >> s >> t;
if (s == "exit") break;
sa.search(s, t, result);
for (int i = 0; i < result.size(); i++) {
cout << result[i] << ' ';
}
cout << endl;
result.clear();
}
return 0;
}
#include <iostream>
#include <string>
#include <vector>
using namespace std;
class ShiftOr {
private:
void preprocess(string t, long long *appear, const int n) {
for (int i = 0; i < n; i++) {
appear[t[i]] ^= 1 << i;
}
}
public:
void search(string s, string t, vector<int> &index);
};
void ShiftOr::search(string s, string t, vector<int> &index) {
const int m = s.size(), n = t.size();
long long *appear = new long long[256]();
for (int i = 0; i < 256; i++) appear[i] = ~0;
preprocess(t, appear, n);
long long p = ~0, high = 1 << (n - 1); // high = 1000...000
for (int i = 0; i < m; i++) {
// p = (p << 1 | 1) & appear[s[i]];
p = (p << 1) | appear[s[i]];
if ((~p) & high) index.push_back(i-n+1);
}
delete [] appear;
}
int main() {
ShiftOr so;
string s, t;
vector<int> result;
while (1) {
cin >> s >> t;
if (s == "exit") break;
so.search(s, t, result);
for (int i = 0; i < result.size(); i++) {
cout << result[i] << ' ';
}
cout << endl;
result.clear();
}
return 0;
}
asdf asdf
asdfasdf asdf
abcasdfasdfefg asdf
abcabcabcefg abcabce
aaaaaaa aaaa
exit exit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment