Skip to content

Instantly share code, notes, and snippets.

@ZhanruiLiang
Created August 20, 2012 17:05
Show Gist options
  • Save ZhanruiLiang/3405848 to your computer and use it in GitHub Desktop.
Save ZhanruiLiang/3405848 to your computer and use it in GitHub Desktop.
Suffix tree construction with Ukkonen's online algorithm. Implement in C++.
#include<iostream>
#include<string>
#include<cstdio>
#include<cstring>
#include<cctype>
#include<vector>
#include<cstring>
#include<algorithm>
#include<vector>
#include<cmath>
#define For(i, n) for(i = 0; i < n; i++)
#define Forr(i, l, n) for(i = l; i < n; i++)
typedef long long LL;
using namespace std;
////////////////////// suffix tree code below //////////////////////////////////
// the alphabet size
const int E = 27;
const int SEP = E - 1;
const int N = 100002;
const int oo = 100000000;
class STNode{
private:
struct Tran{
int s, k, p;
};
Tran ts[E]; // transitions' hash-table
public:
int fail;
void init(){
int i;
For(i, E) ts[i].s = -1;
fail = -1;
}
bool has_transition(int cc){
return ts[cc].s != -1;
}
void get_transition(int cc, int& k, int& p, int& s){
k = ts[cc].k;
p = ts[cc].p;
s = ts[cc].s;
}
void set_transition(int cc, int k, int p, int s){
ts[cc].s = s;
ts[cc].k = k;
ts[cc].p = p;
}
};
STNode nodes[N + 2];
class SuffixTree{
public:
vector<int> origS;
int root, aux;
int nextId;
int activeS, activeK, activeP;
int new_node(){
//create a new branch node and return the id
nodes[nextId].init();
return nextId++;
}
int char2code(char c){ return c - 'a'; }
char code2char(int code){ return 'a' + code; }
void update(int& s, int& k, int& p, int i){
// (s, (k, p)) is the canonicial reference pair for the active point
int oldr, r, r1;
int cc = origS[E+i];
oldr = root;
int k1, p1;
while(1){
if(test_and_split(s, k, p, cc, r))
break;
//create new leaf node
nodes[r].get_transition(cc, k, p, r1); // r1 <= -1
r1 = r1 - 1;
nodes[r].set_transition(cc, i, oo, r1);
if(oldr != root) nodes[oldr].fail = r;
oldr = r;
s = nodes[s].fail;
canonize(s, k, p);
}
if(oldr != root) nodes[oldr].fail = s;
}
bool test_and_split(int s, int k, int p, int cc, int & r){
r = s;
int k1, p1, s1;
if(p > 0){
nodes[s].get_transition(origS[E + k], k1, p1, s1);
if(cc == origS[E + k1 + p] and cc != SEP) return true;
else{
r = new_node();
nodes[s].set_transition(origS[E + k1], k1, p, r);
nodes[r].set_transition(origS[E + k1 + p], k1 + p, p1 - p, s1);
return false;
}
}else
return nodes[s].has_transition(cc) and cc != SEP;
}
void canonize(int& s, int& k, int& p){
int ss, kk, pp;
if(p == 0) return;
nodes[s].get_transition(origS[E + k], kk, pp, ss);
while(pp <= p){
k += pp;
p -= pp;
s = ss;
if(p > 0) nodes[s].get_transition(origS[E + k], kk, pp, ss);
else break;
}
}
void start_build(){
//call before create a new tree
nextId = 0;
aux = new_node();
root = new_node();
int i;
// transitions from aux to root
origS.clear();
For(i, E) origS.push_back(E-1-i);
For(i, E) nodes[aux].set_transition(i, -i-1, 1, root);
// suffix link from root to aux
nodes[root].fail = aux;
nodes[aux].fail = aux;
activeS = root; activeK = 0; activeP = 0;
}
void add_letter(char c){
origS.push_back(char2code(c));
//update from active point (s, (k, p))
update(activeS, activeK, activeP, origS.size()-1-E);
//now (s, (k, p)) is the end point, it has a c-transition
activeP++;
canonize(activeS, activeK, activeP);
}
// optional part
void build(const string& s){
start_build();
int i;
For(i, s.size()) add_letter(s[i]);
}
// optional part
void print(){
cout << "(" << root << ")\n";
_print(root, " ");
}
// optional part
void _print(int s, string childIndent){
int i;
int k, p, s1;
For(i, E){
if(nodes[s].has_transition(i)){
nodes[s].get_transition(i, k, p, s1);
string skp;
for(int j = k; j < k + p and j < origS.size()-E; j++) skp.push_back(code2char(origS[E + j]));
printf("%s[%s](%d){%d}\n", childIndent.c_str(), skp.c_str(), s1, (s1==-2?-1:nodes[s1].fail));
if(s1 >= 0) _print(s1, " " + childIndent);
}
}
}
// optional part
void walk(int& s, int& k, int& p, int& depth, char c){
int cc = char2code(c);
int k1, p1, s1;
while(1){
if(p == 0){
if(nodes[s].has_transition(cc)){
nodes[s].get_transition(cc, k, p1, s1);
break;
}
}else{
nodes[s].get_transition(origS[E + k], k, p1, s1);
if(k + p < origS.size() - E and cc == origS[E + k + p]){
break;
}
}
s = nodes[s].fail;
canonize(s, k, p);
depth--;
}
depth++;
p++;
canonize(s, k, p);
// if(++p == p1){ s = s1; p = 0; }
}
};
//////////////////////////// end of suffix tree code ///////////////////////////
// poj 2774
SuffixTree st;
string s1, s2;
int main(){
// printf("node size: %d\ntotal size: %d\n", sizeof(STNode), sizeof(STNode) * K); return 0;
#ifdef DEBUG
freopen("in", "r", stdin);
#endif
cin >> s1 >> s2;
st.build(s1);
// st.print();
int i;
// find max depth
int s, k, p; // the state (s, (k, p)) on the suffix tree
int maxDepth = 0;
int depth = 0;
s = st.root; k = 0; p = 0;
for(i = 0; i < s2.size(); i++){
st.walk(s, k, p, depth, s2[i]);
maxDepth = max(maxDepth, depth);
}
cout << maxDepth << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment