songron · January 15, 2021 07:24
diff --git a/0-StringSearch-KMP-ShiftAnd-ShiftOr b/0-StringSearch-KMP-ShiftAnd-ShiftOr
 KMP算法；字符串匹配/子串查找。
diff --git a/KMP.cc b/KMP.cc
 // Copyright (c) 2014 Peking University.
 // Author: Xiaosong Rong ([email protected])

 #include <iostream>
 #include <string>
 #include <vector>
 using namespace std;

 class KMP {
 private:
    // find the longest common prefix-suffix substr, for every prefix-substr of t
    // match[i] records the endpoint of the longest common prefix-suffix for t[0..i]
    // e.g., t = "ababxabc", then match = {-1, -1, 0, 1, -1, 0, 1, -1}
    void preprocess(string t, int match[], const int n) {
        match[0] = -1;
        int i = 0, j = 1;
        while (j < n) {
            if (t[j] == t[i]) {
                match[j++] = i++;
            }
            else {
                if (i == 0) {
                    match[j++] = -1;
                }
                else {
                    i = match[i-1] + 1;
                }
            }
        }
    }

 public:
    KMP() {}
    ~KMP() {}
    void kmpsearch(string s, string t, vector<int> &index) ;
 };

 void KMP::kmpsearch(string s, string t, vector<int> &index) {
    const int n = t.size(), m = s.size();
    int *match = new int[n];
    preprocess(t, match, n);

    int i = 0, j = 0;   // i is for the pattern 't'; j is for the string 's'
    while (j < m) {
        while (i < n && j < m && s[j] == t[i]) {
            i++;
            j++;
        }
        if (i == n) {  // find a match
            index.push_back(j-n);
            i = match[i-1] + 1;
            continue;
        }
        if (j == m) break;

        if (i == 0) {
            j++;
        }
        else {
            i = match[i-1] + 1;
        }
    }

    delete [] match;
 }


 int main() {
    KMP kmp;

    string s, t;
    vector<int> result;

    while (1) {
        cin >> s >> t;
        if (s == "exit") break;

        kmp.kmpsearch(s, t, result);
        for (int i = 0; i < result.size(); i++) {
            cout << result[i] << ' ';
        }
        cout << endl;
 	result.clear();
    }

    return 0;
 }
diff --git a/kmpsearch.cpp b/kmpsearch.cpp
 /*/
    Knuth-Morris-Pratt algorithm -- KMP-search for string matching.
    Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm

    Find t in s (t and s are both string) :
    complexity of preprocess or kmpsearch is O(m), O(n) respectly.
    n > m, so overall complexity of KMP algorithm is O(n)
 */

 #include <iostream>
 #include <vector>
 using namespace std;

 void preprocess(string t, int *next, int n)
 {
    int i=1, j=0;
    while (i < n) {
        if (t[i] == t[j])
            next[i++] = ++j;
        else if (j == 0)  // t[i] != t[j]
            next[i++] = j;
        else
            j = next[j-1];
    }
 }

 vector<int> kmpsearch(string s, string t)
 {
    vector<int> res;
    int m=s.length(), n=t.length();
    int *next = new int [n];
    preprocess(t, next, n);

    int i=0, j=0;
    while (i < m && m-i >= n-j) {
        while (j<n && s[i] == t[j]) {
            i++;
            j++;
        }
        if (j == n) {
            res.push_back(i-n);
            j = next[j-1];
        }
        else {
            if (j == 0)
                i++;
            else
                j = next[j-1];
        }
    }

    delete [] next;
    return res;
 }

 void show(vector<int> res)
 {
    int n = res.size();
    for (int i=0; i<n; i++)
        cout << res[i] << ' ';
    cout << endl;
 }

 int main()
 {
    string s, t;
    cin >> s >> t;
    vector<int> res = kmpsearch(s, t);
    show(res);
    return 0;
 }
diff --git a/kmpsearch.py b/kmpsearch.py
 #coding=utf8
 
 """
    Knuth-Morris-Pratt algorithm -- KMP-search for string matching.
    Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
 
    Find t in s (t and s are both string) :
    complexity of preprocess or kmpsearch is O(m), O(n) respectly.
    n > m, so overall complexity of KMP algorithm is O(n)
 """
 
 def preprocess(t):
    m = len(t)
    next = [0]*(m)
    i = 1
    j = 0
    """
        In this loop, i+=1 is excuted exactly m times.
        j=next[j-1], it will decrease the value of j by at least 1. 
        j can only be increased at most m times, 
        so j=next[j-1] will be excuted at most m times.
        So, overall loop requires at most 2m=O(m) steps.
    """
    while i<m:
        if t[i] == t[j]:
            j += 1
            next[i] = j
        elif j == 0:
            next[i] = 0
        else:
            j = next[j-1]
            continue
        i += 1
    return next
 
 def kmpsearch(s, t, next):
    n = len(s)
    m = len(t)
    i = j = 0
    res = []
    """
        In this loop, i+=1 will be excuted at most n times.
        j+=1 is excuted at most n times.
        j=next[j-1] decrease j at least 1, so it will be excuted at most n times.
        i+=1 is not excuted only when j=next[j-1] is excuted !
        So, overall loop requires at most 2n=O(n) steps.
    """
    while i < n and m-j <= n-i:  #if m-j>n-1, remain of t cannot match any substring of remain of s
        while j<m and s[i] == t[j]:
            i += 1
            j += 1
        if j == 0:
            i += 1
        else:
            if j == m: #find a result
                res.append(i-j)
            j = next[j-1] # decrease j at least 1
    return res
 
 
 def test():
    s = 'abababaaasdfababaaasdfababa'
    t = 'ababaa'
    next = preprocess(t)
    res = kmpsearch(s, t, next)
    print res
 
 if __name__ == '__main__':
    test()
diff --git a/ShiftAnd.cc b/ShiftAnd.cc
 #include <iostream>
 #include <string>
 #include <vector>
 using namespace std;

 /*
 Very efficient, but only work when the length of the pattern is no longer than the memory-word size of the machine.
 */

 class ShiftAnd {
 private:
    void preprocess(string t, long long *appear, const int n) {
        for (int i = 0; i < n; i++) {
            appear[t[i]] |= 1 << i;   // appear[c] records where c appears
        }
    }

 public:
    void search(string s, string t, vector<int> &index);
 };

 void ShiftAnd::search(string s, string t, vector<int> &index) {
    const int m = s.size(), n = t.size();
    long long *appear = new long long[256]();
    preprocess(t, appear, n);

    long long p = 0, high = 1 << (n - 1);  // high = 1000...000
    // the ith bit of p is 1 means that t[0..i] has been matched.
    // So, the (n-1)th bit is 1 means that the pattern t has been matched.
    for (int i = 0; i < m; i++) {
        p = (p << 1 | 1) & appear[s[i]];
        if (p & high) index.push_back(i-n+1);
    }

    delete [] appear;
 }

 int main() {
    ShiftAnd sa;
    string s, t;
    vector<int> result;

    while (1) {
        cin >> s >> t;
        if (s == "exit") break;

        sa.search(s, t, result);
        for (int i = 0; i < result.size(); i++) {
            cout << result[i] << ' ';
        }
        cout << endl;
 	result.clear();
    }

    return 0;
 }
diff --git a/ShiftOr.cc b/ShiftOr.cc
 #include <iostream>
 #include <string>
 #include <vector>
 using namespace std;

 class ShiftOr {
 private:
    void preprocess(string t, long long *appear, const int n) {
        for (int i = 0; i < n; i++) {
            appear[t[i]] ^= 1 << i;
        }
    }

 public:
    void search(string s, string t, vector<int> &index);
 };

 void ShiftOr::search(string s, string t, vector<int> &index) {
    const int m = s.size(), n = t.size();
    long long *appear = new long long[256]();
    for (int i = 0; i < 256; i++) appear[i] = ~0;
    preprocess(t, appear, n);

    long long p = ~0, high = 1 << (n - 1);  // high = 1000...000
    for (int i = 0; i < m; i++) {
        // p = (p << 1 | 1) & appear[s[i]];
        p = (p << 1) | appear[s[i]];
        if ((~p) & high) index.push_back(i-n+1);
    }

    delete [] appear;
 }

 int main() {
    ShiftOr so;
    string s, t;
    vector<int> result;

    while (1) {
        cin >> s >> t;
        if (s == "exit") break;

        so.search(s, t, result);
        for (int i = 0; i < result.size(); i++) {
            cout << result[i] << ' ';
        }
        cout << endl;
 	result.clear();
    }

    return 0;
 }
diff --git a/testcase.txt b/testcase.txt
 asdf asdf
 asdfasdf asdf
 abcasdfasdfefg asdf
 abcabcabcefg abcabce
 aaaaaaa aaaa
 exit exit
	// Copyright (c) 2014 Peking University.
	// Author: Xiaosong Rong ([email protected])

	#include <iostream>
	#include <string>
	#include <vector>
	using namespace std;

	class KMP {
	private:
	// find the longest common prefix-suffix substr, for every prefix-substr of t
	// match[i] records the endpoint of the longest common prefix-suffix for t[0..i]
	// e.g., t = "ababxabc", then match = {-1, -1, 0, 1, -1, 0, 1, -1}
	void preprocess(string t, int match[], const int n) {
	match[0] = -1;
	int i = 0, j = 1;
	while (j < n) {
	if (t[j] == t[i]) {
	match[j++] = i++;
	}
	else {
	if (i == 0) {
	match[j++] = -1;
	}
	else {
	i = match[i-1] + 1;
	}
	}
	}
	}

	public:
	KMP() {}
	~KMP() {}
	void kmpsearch(string s, string t, vector<int> &index) ;
	};

	void KMP::kmpsearch(string s, string t, vector<int> &index) {
	const int n = t.size(), m = s.size();
	int *match = new int[n];
	preprocess(t, match, n);

	int i = 0, j = 0; // i is for the pattern 't'; j is for the string 's'
	while (j < m) {
	while (i < n && j < m && s[j] == t[i]) {
	i++;
	j++;
	}
	if (i == n) { // find a match
	index.push_back(j-n);
	i = match[i-1] + 1;
	continue;
	}
	if (j == m) break;

	if (i == 0) {
	j++;
	}
	else {
	i = match[i-1] + 1;
	}
	}

	delete [] match;
	}


	int main() {
	KMP kmp;

	string s, t;
	vector<int> result;

	while (1) {
	cin >> s >> t;
	if (s == "exit") break;

	kmp.kmpsearch(s, t, result);
	for (int i = 0; i < result.size(); i++) {
	cout << result[i] << ' ';
	}
	cout << endl;
	result.clear();
	}

	return 0;
	}
	/*/
	Knuth-Morris-Pratt algorithm -- KMP-search for string matching.
	Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm

	Find t in s (t and s are both string) :
	complexity of preprocess or kmpsearch is O(m), O(n) respectly.
	n > m, so overall complexity of KMP algorithm is O(n)
	*/

	#include <iostream>
	#include <vector>
	using namespace std;

	void preprocess(string t, int *next, int n)
	{
	int i=1, j=0;
	while (i < n) {
	if (t[i] == t[j])
	next[i++] = ++j;
	else if (j == 0) // t[i] != t[j]
	next[i++] = j;
	else
	j = next[j-1];
	}
	}

	vector<int> kmpsearch(string s, string t)
	{
	vector<int> res;
	int m=s.length(), n=t.length();
	int *next = new int [n];
	preprocess(t, next, n);

	int i=0, j=0;
	while (i < m && m-i >= n-j) {
	while (j<n && s[i] == t[j]) {
	i++;
	j++;
	}
	if (j == n) {
	res.push_back(i-n);
	j = next[j-1];
	}
	else {
	if (j == 0)
	i++;
	else
	j = next[j-1];
	}
	}

	delete [] next;
	return res;
	}

	void show(vector<int> res)
	{
	int n = res.size();
	for (int i=0; i<n; i++)
	cout << res[i] << ' ';
	cout << endl;
	}

	int main()
	{
	string s, t;
	cin >> s >> t;
	vector<int> res = kmpsearch(s, t);
	show(res);
	return 0;
	}
	#coding=utf8

	"""
	Knuth-Morris-Pratt algorithm -- KMP-search for string matching.
	Reference: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm

	Find t in s (t and s are both string) :
	complexity of preprocess or kmpsearch is O(m), O(n) respectly.
	n > m, so overall complexity of KMP algorithm is O(n)
	"""

	def preprocess(t):
	m = len(t)
	next = [0]*(m)
	i = 1
	j = 0
	"""
	In this loop, i+=1 is excuted exactly m times.
	j=next[j-1], it will decrease the value of j by at least 1.
	j can only be increased at most m times,
	so j=next[j-1] will be excuted at most m times.
	So, overall loop requires at most 2m=O(m) steps.
	"""
	while i<m:
	if t[i] == t[j]:
	j += 1
	next[i] = j
	elif j == 0:
	next[i] = 0
	else:
	j = next[j-1]
	continue
	i += 1
	return next

	def kmpsearch(s, t, next):
	n = len(s)
	m = len(t)
	i = j = 0
	res = []
	"""
	In this loop, i+=1 will be excuted at most n times.
	j+=1 is excuted at most n times.
	j=next[j-1] decrease j at least 1, so it will be excuted at most n times.
	i+=1 is not excuted only when j=next[j-1] is excuted !
	So, overall loop requires at most 2n=O(n) steps.
	"""
	while i < n and m-j <= n-i: #if m-j>n-1, remain of t cannot match any substring of remain of s
	while j<m and s[i] == t[j]:
	i += 1
	j += 1
	if j == 0:
	i += 1
	else:
	if j == m: #find a result
	res.append(i-j)
	j = next[j-1] # decrease j at least 1
	return res


	def test():
	s = 'abababaaasdfababaaasdfababa'
	t = 'ababaa'
	next = preprocess(t)
	res = kmpsearch(s, t, next)
	print res

	if __name__ == '__main__':
	test()
	asdf asdf
	asdfasdf asdf
	abcasdfasdfefg asdf
	abcabcabcefg abcabce
	aaaaaaa aaaa
	exit exit