Skip to content

Instantly share code, notes, and snippets.

@Trass3r
Created January 14, 2018 16:46
Show Gist options
  • Select an option

  • Save Trass3r/ec17eb0cd683a4d519bed60125c019d3 to your computer and use it in GitHub Desktop.

Select an option

Save Trass3r/ec17eb0cd683a4d519bed60125c019d3 to your computer and use it in GitHub Desktop.
// An implementation of the Aho-Corasick algorithm for string matching
// using static arrays for the functions
// http://en.wikipedia.org/wiki/Aho-Corasick_algorithm
module automaton;
import queue;
public import keyword;
public import list;
const uint FAIL = 0xFFFFFFFF; /// the fail value for the goto function (-1)
version=NORMAL; // standard version using the failure function, undefine for the version using the next function
/// Automaton class
class Automaton
{
private:
ubyte[256] m_sigma; // the automaton alphabet
Keyword[] m_keywords;
uint[256] m_failure; // the failure "function"
uint[256][256] m_goto; // the goto "function"
List[256] m_output; // the output "function"
uint m_numStates; // save number of states for the queue
uint Goto(uint state, char ch) {return m_goto[state][ch];} /// the goto function
void setGoto(uint state, char ch, uint value) {m_goto[state][ch]=value;}
List output(uint state) /// the output function
{
if (m_output[state] is null)
m_output[state] = new List();
return m_output[state];
}
/// create the goto function
void createGoto()
{
debug printf("createGoto()\n");
// for(uint i=0; i<256; i++)
// m_output[i] = new List();
for(uint i=0; i<256; i++)
for(uint j=0; j<256; j++)
setGoto(i,j,FAIL); // initiate goto array
uint newstate=0;
uint state,j,s;
for(uint i=0; i<m_keywords.length; i++) // loop through all keywords
{
state=0;j=0;
while((s=Goto(state,m_keywords[i][j])) != FAIL) // trace the trie as long as possible
{
state=s;
j++;
}
for(uint p=j; p<m_keywords[i].length; p++) // create states for the rest of the keyword
{
newstate++;
setGoto(state,m_keywords[i][p],newstate);
state=newstate;
}
m_output[state]=new List(i);
}
foreach(ubyte ch; m_sigma)
{
if (Goto(0,ch) == FAIL)
setGoto(0,ch,0); // create the initial state's loop
}
m_numStates=state+1;
}
/// create the failure function
void createFailure()
{
uint s,t;
auto q = new Queue!(uint)(m_numStates);
foreach(ubyte ch; m_sigma)
{
if ((s=Goto(0,ch)) != 0)
{
q.enqueue(s);
m_failure[s] = 0; // level 1 gets start state as fail target
}
}
while(q.hasElements)
{
uint r = q.dequeue();
foreach(ubyte ch; m_sigma)
{
if ((s=Goto(r,ch)) != FAIL)
{
q.enqueue(s);
uint state = m_failure[r];
while((t=Goto(state,ch))==FAIL)
state=m_failure[state];
m_failure[s]=t;
output(s).unifyWith(output(m_failure[s]));
}
}
}
}
/// create the transition function for our DFA
void createNext()
{
uint s,t;
auto q = new Queue!(uint)(m_numStates);
foreach(ubyte ch; m_sigma)
{
// delta(0,ch) = Goto(0,ch);
if ((s=Goto(0,ch)) != 0)
{
q.enqueue(s);
m_failure[s] = 0;
}
}
while(q.hasElements)
{
uint r = q.dequeue();
foreach(ubyte ch; m_sigma)
{
if ((s=Goto(r,ch)) != FAIL)
{
q.enqueue(s);
//delta(r,ch) = s;
uint state = m_failure[r];
while((t=Goto(state,ch))==FAIL)
state=m_failure[state];
m_failure[s]=t;
output(s).unifyWith(output(m_failure[s]));
}
else
//delta(r,ch) = delta(m_failure[r],ch);
setGoto(r,ch,Goto(m_failure[r],ch));
}
}
}
public:
this(Keyword[] keywords)
{
for(uint i=0; i<256; i++)
m_sigma[i]=i; // initialize sigma, the alphabet
m_keywords = keywords;
createGoto();
version(NORMAL)
{
createFailure();
}
else
{
createNext();
}
}
/// process a given input and call outputFunc on each match with a keyword
void process(ubyte[] text, int function(uint i, List l) outputFunc)
{
debug uint counter;
uint state=0,s;
for(uint i=0; i<text.length; i++)
{
version(NORMAL)
{
while((s=Goto(state,text[i])) == FAIL)
{
debug counter+=2;
state=m_failure[state];
}
debug counter++;
state = s;
}
else
{
state=Goto(state,text[i]);
debug counter++;
}
List tmp = output(state);
//if(m_output[state].hasElements)
if(tmp.hasElements)
outputFunc(i, tmp); //output(state));
}
debug printf("made %i transitions\n",counter);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment