Created
January 14, 2018 16:46
-
-
Save Trass3r/ec17eb0cd683a4d519bed60125c019d3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // An implementation of the Aho-Corasick algorithm for string matching | |
| // using static arrays for the functions | |
| // http://en.wikipedia.org/wiki/Aho-Corasick_algorithm | |
| module automaton; | |
| import queue; | |
| public import keyword; | |
| public import list; | |
| const uint FAIL = 0xFFFFFFFF; /// the fail value for the goto function (-1) | |
| version=NORMAL; // standard version using the failure function, undefine for the version using the next function | |
| /// Automaton class | |
| class Automaton | |
| { | |
| private: | |
| ubyte[256] m_sigma; // the automaton alphabet | |
| Keyword[] m_keywords; | |
| uint[256] m_failure; // the failure "function" | |
| uint[256][256] m_goto; // the goto "function" | |
| List[256] m_output; // the output "function" | |
| uint m_numStates; // save number of states for the queue | |
| uint Goto(uint state, char ch) {return m_goto[state][ch];} /// the goto function | |
| void setGoto(uint state, char ch, uint value) {m_goto[state][ch]=value;} | |
| List output(uint state) /// the output function | |
| { | |
| if (m_output[state] is null) | |
| m_output[state] = new List(); | |
| return m_output[state]; | |
| } | |
| /// create the goto function | |
| void createGoto() | |
| { | |
| debug printf("createGoto()\n"); | |
| // for(uint i=0; i<256; i++) | |
| // m_output[i] = new List(); | |
| for(uint i=0; i<256; i++) | |
| for(uint j=0; j<256; j++) | |
| setGoto(i,j,FAIL); // initiate goto array | |
| uint newstate=0; | |
| uint state,j,s; | |
| for(uint i=0; i<m_keywords.length; i++) // loop through all keywords | |
| { | |
| state=0;j=0; | |
| while((s=Goto(state,m_keywords[i][j])) != FAIL) // trace the trie as long as possible | |
| { | |
| state=s; | |
| j++; | |
| } | |
| for(uint p=j; p<m_keywords[i].length; p++) // create states for the rest of the keyword | |
| { | |
| newstate++; | |
| setGoto(state,m_keywords[i][p],newstate); | |
| state=newstate; | |
| } | |
| m_output[state]=new List(i); | |
| } | |
| foreach(ubyte ch; m_sigma) | |
| { | |
| if (Goto(0,ch) == FAIL) | |
| setGoto(0,ch,0); // create the initial state's loop | |
| } | |
| m_numStates=state+1; | |
| } | |
| /// create the failure function | |
| void createFailure() | |
| { | |
| uint s,t; | |
| auto q = new Queue!(uint)(m_numStates); | |
| foreach(ubyte ch; m_sigma) | |
| { | |
| if ((s=Goto(0,ch)) != 0) | |
| { | |
| q.enqueue(s); | |
| m_failure[s] = 0; // level 1 gets start state as fail target | |
| } | |
| } | |
| while(q.hasElements) | |
| { | |
| uint r = q.dequeue(); | |
| foreach(ubyte ch; m_sigma) | |
| { | |
| if ((s=Goto(r,ch)) != FAIL) | |
| { | |
| q.enqueue(s); | |
| uint state = m_failure[r]; | |
| while((t=Goto(state,ch))==FAIL) | |
| state=m_failure[state]; | |
| m_failure[s]=t; | |
| output(s).unifyWith(output(m_failure[s])); | |
| } | |
| } | |
| } | |
| } | |
| /// create the transition function for our DFA | |
| void createNext() | |
| { | |
| uint s,t; | |
| auto q = new Queue!(uint)(m_numStates); | |
| foreach(ubyte ch; m_sigma) | |
| { | |
| // delta(0,ch) = Goto(0,ch); | |
| if ((s=Goto(0,ch)) != 0) | |
| { | |
| q.enqueue(s); | |
| m_failure[s] = 0; | |
| } | |
| } | |
| while(q.hasElements) | |
| { | |
| uint r = q.dequeue(); | |
| foreach(ubyte ch; m_sigma) | |
| { | |
| if ((s=Goto(r,ch)) != FAIL) | |
| { | |
| q.enqueue(s); | |
| //delta(r,ch) = s; | |
| uint state = m_failure[r]; | |
| while((t=Goto(state,ch))==FAIL) | |
| state=m_failure[state]; | |
| m_failure[s]=t; | |
| output(s).unifyWith(output(m_failure[s])); | |
| } | |
| else | |
| //delta(r,ch) = delta(m_failure[r],ch); | |
| setGoto(r,ch,Goto(m_failure[r],ch)); | |
| } | |
| } | |
| } | |
| public: | |
| this(Keyword[] keywords) | |
| { | |
| for(uint i=0; i<256; i++) | |
| m_sigma[i]=i; // initialize sigma, the alphabet | |
| m_keywords = keywords; | |
| createGoto(); | |
| version(NORMAL) | |
| { | |
| createFailure(); | |
| } | |
| else | |
| { | |
| createNext(); | |
| } | |
| } | |
| /// process a given input and call outputFunc on each match with a keyword | |
| void process(ubyte[] text, int function(uint i, List l) outputFunc) | |
| { | |
| debug uint counter; | |
| uint state=0,s; | |
| for(uint i=0; i<text.length; i++) | |
| { | |
| version(NORMAL) | |
| { | |
| while((s=Goto(state,text[i])) == FAIL) | |
| { | |
| debug counter+=2; | |
| state=m_failure[state]; | |
| } | |
| debug counter++; | |
| state = s; | |
| } | |
| else | |
| { | |
| state=Goto(state,text[i]); | |
| debug counter++; | |
| } | |
| List tmp = output(state); | |
| //if(m_output[state].hasElements) | |
| if(tmp.hasElements) | |
| outputFunc(i, tmp); //output(state)); | |
| } | |
| debug printf("made %i transitions\n",counter); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment