Created
December 27, 2014 13:32
-
-
Save saikatkumardey/e7731308d2d92e0c5666 to your computer and use it in GitHub Desktop.
KMP- String search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#define print_vector(vec) for(int i=0;i<vec.size();i++) { cout<<vec[i]<<" ";} cout<<endl; | |
using namespace std; | |
/* | |
pattern = abacab | |
proper prefix = {NULL,a,ab,aba,abac,abaca} | |
proper suffix = {NULL,b,ab,cab,acab, bacab} | |
border= {NULL,ab} | |
widest border = ab [ maximum length string contained in both proper prefix and proper suffix of pattern] | |
Let's take another example, | |
pattern= ababa | |
proper prefix= {NULL,a,ab,aba,abab} | |
proper suffix= {NULL,a,ba,aba,baba} | |
border= {NULL,a,aba} | |
widest border= aba | |
Definition: | |
A border is a substring that is contained in both the proper prefix and proper suffix of a string. | |
Now, | |
prefix_table[j] = width of the "widest border" of the prefix of length j | |
Let's understand it with an example, | |
j 0|1|2|3|4|5|6 | |
pattern[j] a|b|a|b|a|a| | |
prefix_table[j] -1|0|0|1|2|3|1 | |
[ NOTE: when we say prefix_table[i], we are considering the string pattern[0 to i-1] ] | |
Here, | |
prefix_table[0] = -1, since there is no proper-prefix/suffix of a string of length 0, so widest border=-1 | |
prefix_table[1] = 0, Now, we are considering proper-prefix/suffix of string of length 1 = "a" | |
which is NULL. So, widest-border= 0 | |
prefix_table[2]= 0 Now, proper-prefix of string of length 2 = "ab" = {NULL,a} | |
proper-suffix of "ab" = {NULL,b} | |
border= {NULL} | |
widest boder= NULL | |
Length of widest boder= 0 | |
prefix_table[3]= 1 proper-prefix of string of length 3 = "aba" = {NULL,a,ab} | |
proper-suffix of "aba" = {NULL,a,ba} | |
border= {NULL,a} | |
widest boder= a | |
Length of widest boder= 1 | |
prefix_table[4]= 2 proper-prefix of string of length 2 = "abab" = {NULL,a,ab,aba} | |
proper-suffix of "abab" = {NULL,b,ab,bab} | |
border= {NULL,ab} | |
widest boder= ab | |
Length of widest boder= 2 | |
prefix_table[5]= 3 proper-prefix of string of length 2 = "ababa" = {NULL,a,ab,aba,abab} | |
proper-suffix of "ab" = {NULL,a,ba,aba,baba} | |
border= {NULL,a,aba} | |
widest boder= aba | |
Length of widest boder= 3 | |
prefix_table[6]= 1 proper-prefix of string of length 2 = "ababaa" = {NULL,a,ab,aba,abab,ababa} | |
proper-suffix of "ab" = {NULL,a,aa,baa,abaa,babaa} | |
border= {NULL,a} | |
widest boder= a | |
Length of widest boder= 1 | |
*/ | |
void kmp_preprocess(vector<int>& prefix_table, string pattern) | |
{ | |
int pattern_length= pattern.size(); | |
int i=0, j=-1; | |
prefix_table[i]=j; | |
while(i < pattern_length) | |
{ | |
//back-track to previously matched prefix | |
while(j>=0 && pattern[i]!=pattern[j]) j= prefix_table[j]; | |
i++,j++; | |
prefix_table[i]= j; | |
} | |
} | |
void kmp_search(string text, string pattern) | |
{ | |
vector<int> prefix_table(pattern.size()+1); | |
kmp_preprocess(prefix_table, pattern); | |
//print_vector(prefix_table); | |
int i=0, j=0; | |
while(i < text.size()) | |
{ | |
//back-track to previously matched prefix if pattern[j] and text[i] doesn't match | |
while(j>=0 && text[i]!= pattern[j]) j= prefix_table[j]; | |
//a character matches, check next character of text with pattern | |
i++,j++; | |
//if all characters of pattern have been matched, report the index | |
if(j == pattern.size() ) | |
{ | |
cout<<"Match Found at index: "<<i-j<<endl; | |
j= prefix_table[j]; | |
//back-track to previously matched prefix to keep reporting other occurrences of the pattern | |
} | |
} | |
} | |
int main(void) | |
{ | |
string text= "abababababaababababaa"; | |
string pattern="ababaa"; | |
kmp_search(text, pattern); | |
/* | |
Match Found at index: 6 | |
Match Found at index: 15 | |
*/ | |
} | |
/* | |
References: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment