|
#include <iostream> |
|
#include <string> |
|
#include <vector> |
|
|
|
using namespace std; |
|
|
|
#include "automatonDrawer.cpp" |
|
|
|
// code taken from http://community.topcoder.com/tc?module=Static&d1=tutorials&d2=stringSearching |
|
// Pay attention! |
|
// the prefix under index i in the table above is |
|
// is the string from pattern[0] to pattern[i - 1] |
|
// inclusive, so the last character of the string under |
|
// index i is pattern[i - 1] |
|
|
|
vector<int> build_failure_function(const string& pattern) |
|
{ |
|
int m = pattern.size(); |
|
// let m be the length of the pattern |
|
vector<int> F(m); |
|
F[0] = F[1] = 0; // always true |
|
|
|
for(int i = 2; i <= m; i++) { |
|
// j is the index of the largest next partial match |
|
// (the largest suffix/prefix) of the string under |
|
// index i - 1 |
|
int j = F[i - 1]; |
|
for( ; ; ) { |
|
// check to see if the last character of string i - |
|
// - pattern[i - 1] "expands" the current "candidate" |
|
// best partial match - the prefix under index j |
|
if(pattern[j] == pattern[i - 1]) { |
|
F[i] = j + 1; break; |
|
} |
|
// if we cannot "expand" even the empty string |
|
if(j == 0) { F[i] = 0; break; } |
|
// else go to the next best "candidate" partial match |
|
j = F[j]; |
|
} |
|
} |
|
return F; |
|
} |
|
|
|
#define forn(i, n) for (int i = 0; i < int(n); ++i) |
|
|
|
vector<char> extract_used_letters(const string& s) { |
|
vector<bool> present(255, false); |
|
forn (i,s.size()) |
|
present[s[i]] = true; |
|
|
|
vector<char> res; |
|
forn (i, 255) |
|
if (present[i]) |
|
res.push_back(i); |
|
return res; |
|
} |
|
|
|
void drawKMPAutomaton(const string& pattern) { |
|
const vector<int> v = build_failure_function(pattern); |
|
int n = v.size(); |
|
|
|
cerr << "failure function: "; |
|
forn (i,n) |
|
cerr << v[i] << " "; |
|
cerr << endl; |
|
|
|
automatonDrawer A; |
|
|
|
const vector<char> letters = extract_used_letters(pattern); |
|
forn (i, n+1) { |
|
for (char letter : letters) { |
|
int j = i; |
|
for (;;) { |
|
if (pattern[j] == letter) { |
|
A.connect(i, j+1, letter); |
|
break; |
|
} |
|
|
|
if (j > 0) { // didn't match, pick another shorter proper prefix-suffix to expand |
|
j = v[j]; |
|
} else { // can't expand at all, go to the beginning. |
|
A.connect(i, 0, letter); |
|
break; |
|
} |
|
} |
|
} |
|
} |
|
|
|
A.draw(n); |
|
} |
|
|
|
int main(int argc, char* argv[]) { |
|
if (argc != 2) { |
|
cerr << endl << "Usage: " << argv[0] << " pattern" << endl << endl; |
|
cerr << "Generates a Graphviz dot file for plotting the implicit automaton used " << endl; |
|
cerr << "to match the pattern during the KMP string matching algorithm." << endl << endl; |
|
cerr << "example:" << endl << endl; |
|
cerr << " " << argv[0] << " ababac | dot -Tpng -o automaton.png" << endl << endl; |
|
return 1; |
|
} |
|
string pattern = argv[1]; |
|
drawKMPAutomaton(pattern); |
|
return 0; |
|
} |