Created
August 20, 2012 17:05
-
-
Save ZhanruiLiang/3405848 to your computer and use it in GitHub Desktop.
Suffix tree construction with Ukkonen's online algorithm. Implement in C++.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<iostream> | |
#include<string> | |
#include<cstdio> | |
#include<cstring> | |
#include<cctype> | |
#include<vector> | |
#include<cstring> | |
#include<algorithm> | |
#include<vector> | |
#include<cmath> | |
#define For(i, n) for(i = 0; i < n; i++) | |
#define Forr(i, l, n) for(i = l; i < n; i++) | |
typedef long long LL; | |
using namespace std; | |
////////////////////// suffix tree code below ////////////////////////////////// | |
// the alphabet size | |
const int E = 27; | |
const int SEP = E - 1; | |
const int N = 100002; | |
const int oo = 100000000; | |
class STNode{ | |
private: | |
struct Tran{ | |
int s, k, p; | |
}; | |
Tran ts[E]; // transitions' hash-table | |
public: | |
int fail; | |
void init(){ | |
int i; | |
For(i, E) ts[i].s = -1; | |
fail = -1; | |
} | |
bool has_transition(int cc){ | |
return ts[cc].s != -1; | |
} | |
void get_transition(int cc, int& k, int& p, int& s){ | |
k = ts[cc].k; | |
p = ts[cc].p; | |
s = ts[cc].s; | |
} | |
void set_transition(int cc, int k, int p, int s){ | |
ts[cc].s = s; | |
ts[cc].k = k; | |
ts[cc].p = p; | |
} | |
}; | |
STNode nodes[N + 2]; | |
class SuffixTree{ | |
public: | |
vector<int> origS; | |
int root, aux; | |
int nextId; | |
int activeS, activeK, activeP; | |
int new_node(){ | |
//create a new branch node and return the id | |
nodes[nextId].init(); | |
return nextId++; | |
} | |
int char2code(char c){ return c - 'a'; } | |
char code2char(int code){ return 'a' + code; } | |
void update(int& s, int& k, int& p, int i){ | |
// (s, (k, p)) is the canonicial reference pair for the active point | |
int oldr, r, r1; | |
int cc = origS[E+i]; | |
oldr = root; | |
int k1, p1; | |
while(1){ | |
if(test_and_split(s, k, p, cc, r)) | |
break; | |
//create new leaf node | |
nodes[r].get_transition(cc, k, p, r1); // r1 <= -1 | |
r1 = r1 - 1; | |
nodes[r].set_transition(cc, i, oo, r1); | |
if(oldr != root) nodes[oldr].fail = r; | |
oldr = r; | |
s = nodes[s].fail; | |
canonize(s, k, p); | |
} | |
if(oldr != root) nodes[oldr].fail = s; | |
} | |
bool test_and_split(int s, int k, int p, int cc, int & r){ | |
r = s; | |
int k1, p1, s1; | |
if(p > 0){ | |
nodes[s].get_transition(origS[E + k], k1, p1, s1); | |
if(cc == origS[E + k1 + p] and cc != SEP) return true; | |
else{ | |
r = new_node(); | |
nodes[s].set_transition(origS[E + k1], k1, p, r); | |
nodes[r].set_transition(origS[E + k1 + p], k1 + p, p1 - p, s1); | |
return false; | |
} | |
}else | |
return nodes[s].has_transition(cc) and cc != SEP; | |
} | |
void canonize(int& s, int& k, int& p){ | |
int ss, kk, pp; | |
if(p == 0) return; | |
nodes[s].get_transition(origS[E + k], kk, pp, ss); | |
while(pp <= p){ | |
k += pp; | |
p -= pp; | |
s = ss; | |
if(p > 0) nodes[s].get_transition(origS[E + k], kk, pp, ss); | |
else break; | |
} | |
} | |
void start_build(){ | |
//call before create a new tree | |
nextId = 0; | |
aux = new_node(); | |
root = new_node(); | |
int i; | |
// transitions from aux to root | |
origS.clear(); | |
For(i, E) origS.push_back(E-1-i); | |
For(i, E) nodes[aux].set_transition(i, -i-1, 1, root); | |
// suffix link from root to aux | |
nodes[root].fail = aux; | |
nodes[aux].fail = aux; | |
activeS = root; activeK = 0; activeP = 0; | |
} | |
void add_letter(char c){ | |
origS.push_back(char2code(c)); | |
//update from active point (s, (k, p)) | |
update(activeS, activeK, activeP, origS.size()-1-E); | |
//now (s, (k, p)) is the end point, it has a c-transition | |
activeP++; | |
canonize(activeS, activeK, activeP); | |
} | |
// optional part | |
void build(const string& s){ | |
start_build(); | |
int i; | |
For(i, s.size()) add_letter(s[i]); | |
} | |
// optional part | |
void print(){ | |
cout << "(" << root << ")\n"; | |
_print(root, " "); | |
} | |
// optional part | |
void _print(int s, string childIndent){ | |
int i; | |
int k, p, s1; | |
For(i, E){ | |
if(nodes[s].has_transition(i)){ | |
nodes[s].get_transition(i, k, p, s1); | |
string skp; | |
for(int j = k; j < k + p and j < origS.size()-E; j++) skp.push_back(code2char(origS[E + j])); | |
printf("%s[%s](%d){%d}\n", childIndent.c_str(), skp.c_str(), s1, (s1==-2?-1:nodes[s1].fail)); | |
if(s1 >= 0) _print(s1, " " + childIndent); | |
} | |
} | |
} | |
// optional part | |
void walk(int& s, int& k, int& p, int& depth, char c){ | |
int cc = char2code(c); | |
int k1, p1, s1; | |
while(1){ | |
if(p == 0){ | |
if(nodes[s].has_transition(cc)){ | |
nodes[s].get_transition(cc, k, p1, s1); | |
break; | |
} | |
}else{ | |
nodes[s].get_transition(origS[E + k], k, p1, s1); | |
if(k + p < origS.size() - E and cc == origS[E + k + p]){ | |
break; | |
} | |
} | |
s = nodes[s].fail; | |
canonize(s, k, p); | |
depth--; | |
} | |
depth++; | |
p++; | |
canonize(s, k, p); | |
// if(++p == p1){ s = s1; p = 0; } | |
} | |
}; | |
//////////////////////////// end of suffix tree code /////////////////////////// | |
// poj 2774 | |
SuffixTree st; | |
string s1, s2; | |
int main(){ | |
// printf("node size: %d\ntotal size: %d\n", sizeof(STNode), sizeof(STNode) * K); return 0; | |
#ifdef DEBUG | |
freopen("in", "r", stdin); | |
#endif | |
cin >> s1 >> s2; | |
st.build(s1); | |
// st.print(); | |
int i; | |
// find max depth | |
int s, k, p; // the state (s, (k, p)) on the suffix tree | |
int maxDepth = 0; | |
int depth = 0; | |
s = st.root; k = 0; p = 0; | |
for(i = 0; i < s2.size(); i++){ | |
st.walk(s, k, p, depth, s2[i]); | |
maxDepth = max(maxDepth, depth); | |
} | |
cout << maxDepth << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment