Last active
August 29, 2015 14:06
-
-
Save matutter/f73c38b5ca0850b94c53 to your computer and use it in GitHub Desktop.
an NFA engine
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Mathew Utter | |
| //#include <iostream> | |
| #include <string> | |
| #include <cstdlib> | |
| #include <vector> | |
| #include <cstring> | |
| #include <algorithm> | |
| #include "TokenProviderNFA.h" | |
| using namespace std; | |
| class State { | |
| private: | |
| void _super(int f, char o, int t) { | |
| this->From = f; | |
| this->To = t; | |
| this->On = o; | |
| this->Epsilon = o == 'E'; | |
| } | |
| public: | |
| bool Epsilon; | |
| int From; | |
| char On; | |
| int To; | |
| State(){} | |
| State(int f, char o, int t) { | |
| this->_super(f,o,t); | |
| } | |
| State(string s, string s1, string s2) { | |
| this->_super(atoi( s.c_str() ),s1[0],atoi( s2.c_str() )); | |
| } | |
| State( State *s ) | |
| { | |
| this->From = s->From; | |
| this->On = s->On; | |
| this->To = s->To; | |
| this->Epsilon = s->Epsilon; | |
| } | |
| bool isFrom(int f) { | |
| return f == this->From; | |
| } | |
| bool isOn(char o) { | |
| return o == this->On; | |
| } | |
| bool isTo(int t) { | |
| return t == this->To; | |
| } | |
| bool isEpsilon() { | |
| return this->Epsilon; | |
| } | |
| bool operator==(const State &s) { | |
| return s.From == From && s.To == To && s.On == On; | |
| } | |
| friend ostream& operator<< (ostream &out, State &s); | |
| }; | |
| /* fancy vector with some opt for when m=O(n^2) this is linear or this<m */ | |
| class Linker : public std::vector<State> { | |
| public: | |
| string::iterator return_cursor; | |
| Linker& SetReturn(string::iterator it) { | |
| this->return_cursor = it; | |
| return *this; | |
| } | |
| /* concat */ | |
| Linker& Join(Linker l2) { | |
| l2.Remove( *this ); | |
| this->insert(this->end(), l2.begin(), l2.end()); | |
| return *this; | |
| } | |
| /* Unique */ | |
| Linker& Finalize() { | |
| Linker::iterator cursor = this->begin(); | |
| Linker::iterator end = this->end(); | |
| if( cursor == end ) return *this; | |
| Linker::iterator final = cursor; | |
| while( ++cursor != end ) | |
| if( !(*final == *cursor) ) | |
| *(++final) = *cursor; | |
| this->resize( (final+1) - this->begin() ); | |
| return *this; | |
| } | |
| /* Contains s <= 1 times */ | |
| bool Contains(State s) { | |
| for (Linker::iterator i = this->begin(); i != this->end(); ++i) | |
| if( (*i) == s ) return true; | |
| return false; | |
| } | |
| bool Contains(int s) { | |
| for (Linker::iterator i = this->begin(); i != this->end(); ++i) | |
| if( (*i).From == s ) return true; | |
| return false; | |
| } | |
| bool ContainsEP() { | |
| for (Linker::iterator i = this->begin(); i != this->end(); ++i) | |
| if((*i).isEpsilon()) return true; | |
| return false; | |
| } | |
| /* closure matching 'from' */ | |
| Linker From(int c) { | |
| Linker ln; | |
| for (Linker::iterator i = this->begin(); i != this->end(); ++i) | |
| if( (*i).isFrom( c ) ) | |
| ln.push_back( *i ); | |
| return ln; | |
| } | |
| /* closure matching 'on' or 'E' */ | |
| Linker On(char c) { | |
| Linker ln; | |
| for (Linker::iterator i = this->begin(); i != this->end(); ++i) | |
| if( (*i).isOn( c ) || (*i).isEpsilon() ) | |
| ln.push_back( *i ); | |
| return ln; | |
| } | |
| /* closure matching 'to' */ | |
| Linker To(int c) { | |
| Linker ln; | |
| for (Linker::iterator i = this->begin(); i != this->end(); ++i) | |
| if( (*i).isTo( c ) ) | |
| ln.push_back( *i ); | |
| return ln; | |
| } | |
| Linker& Remove( Linker l ) { | |
| for (Linker::iterator i = this->begin(); i < this->end(); ++i) | |
| for (Linker::iterator x = l.begin(); x != l.end(); ++x) | |
| if( *i == *x ) { | |
| this->erase( i ); | |
| --i; | |
| } | |
| return *this; | |
| } | |
| Linker& Close(Linker U, Linker *used) { | |
| while( this->ContainsEP() ) | |
| for (Linker::iterator i = this->begin(); i < this->end() && this->size() <= U.size(); ++i) | |
| if( (*i).isEpsilon() ) | |
| { | |
| used->push_back(*i); | |
| this->erase(i--); | |
| this->Join( U.From( used->back().To ).Close(U, used) ); | |
| break; | |
| } | |
| return *this; | |
| } | |
| Linker& Close(Linker U) { | |
| Linker used; | |
| while( this->ContainsEP() ) | |
| for (Linker::iterator i = this->begin(); i < this->end() && this->size() <= U.size(); ++i) | |
| if( (*i).isEpsilon() ) | |
| { | |
| used.push_back(*i); | |
| this->erase(i--); | |
| this->Join( U.From( used.back().To ).Close(U, &used) ); | |
| break; | |
| } | |
| return *this; | |
| } | |
| bool AcceptedIn(int* ac) { | |
| for (int x = 0; x < sizeof(ac)/sizeof(int); ++x) | |
| { | |
| for (Linker::iterator i = this->begin(); i < this->end(); ++i) | |
| if((*i).isFrom( ac[x] )) return true; | |
| } | |
| return false; | |
| } | |
| friend ostream& operator<< (ostream &out, Linker &en); | |
| }; | |
| class nEngine { | |
| private: | |
| int TotalStates; | |
| int FinalStates; | |
| int *AcceptStates; | |
| int StartState; | |
| int AlphaSize; | |
| string Alphabet; | |
| Linker States; | |
| public: | |
| nEngine() { | |
| this->TotalStates = 0; | |
| this->FinalStates = 0; | |
| this->StartState = 0; | |
| this->AlphaSize = 0; | |
| } | |
| bool Match(string s) { | |
| Linker ctx; | |
| vector<Linker> ctx_past; | |
| string::iterator cursor; | |
| cursor = s.begin(); | |
| ctx_past.reserve( TotalStates * TotalStates ); | |
| ctx = States.From( StartState ).Close(States); | |
| /* fix for E */ | |
| if( cursor == s.end()-1 && ctx.AcceptedIn( AcceptStates )) | |
| return true; | |
| while( cursor != s.end() ) | |
| { | |
| ctx = ctx.On( *cursor ); | |
| switch( ctx.size() ) | |
| { | |
| case 0: | |
| if(ctx_past.empty()) { | |
| return cursor == s.end() && ctx.AcceptedIn( AcceptStates ); | |
| } | |
| ctx = ctx_past.front(); | |
| cursor = ctx.return_cursor; | |
| ctx_past.erase( ctx_past.begin() ); | |
| break; | |
| default: | |
| ctx_past.push_back(ctx.SetReturn(cursor)); | |
| ctx_past.back().pop_back(); | |
| case 1: | |
| /* match at end, needed because branching ruins algebra */ | |
| if( cursor+1 == s.end() && ctx.AcceptedIn( AcceptStates )) | |
| return true; | |
| ctx = States.From( ctx.back().To ).Close(States); | |
| cursor++; | |
| } | |
| } | |
| return ctx.AcceptedIn( AcceptStates ); | |
| /* | |
| cout << "_________________(" << *cursor << ")" << cursor - s.begin() +1<< endl; | |
| cout << ctx; | |
| */ | |
| } | |
| nEngine& Digest( TokenProviderNFA * nfa ) { | |
| vector<string>::iterator it = nfa->Tokens.begin(); | |
| this->TotalStates = atoi( (*it).c_str() ); | |
| this->FinalStates = atoi( (*(++it)).c_str() ); | |
| this->AcceptStates = new int[this->FinalStates]; | |
| for( int i = 0; i < this->FinalStates; i++ ) | |
| this->AcceptStates[i] = atoi((*(++it)).c_str()); | |
| this->StartState = atoi( (*(++it)).c_str() ); | |
| this->AlphaSize = atoi( (*(++it)).c_str() ); | |
| this->Alphabet = *(++it); | |
| ++it; | |
| for(; it < nfa->Tokens.end(); it+=3 ) | |
| this->States.push_back( new State( *it , *(it+1) , *(it+2)) ); | |
| return *this; | |
| } | |
| friend ostream& operator<< (ostream &out, nEngine &en); | |
| }; | |
| ostream& operator<< (ostream &out, State &s) | |
| { | |
| out << s.From << "\t" << s.On << "\t" << s.To << (s.isEpsilon()?"*":""); | |
| return out; | |
| } | |
| ostream& operator<< (ostream &out, nEngine &en) | |
| { | |
| out << "TS " << en.TotalStates << endl; | |
| out << "FS " << en.FinalStates << endl; | |
| out << "OK "; | |
| for (int i = 0; i < en.FinalStates; ++i) | |
| { | |
| out << en.AcceptStates[i] << " "; | |
| } | |
| out << "" << endl; | |
| out << "SS " << en.StartState << endl; | |
| out << "AS " << en.AlphaSize << endl; | |
| out << "ALPHA " << en.Alphabet << endl; | |
| out << "From\tOn\tTo" << endl; | |
| out << en.States; | |
| return out; | |
| } | |
| ostream& operator<< (ostream &out, Linker &l){ | |
| for(Linker::iterator i = l.begin(); i != l.end(); ++i) | |
| cout << *i << "\t" << endl; | |
| return out; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Mathew Utter | |
| #include <iostream> | |
| #include <fstream> | |
| #include "testEngine.h" | |
| using namespace std; | |
| int main(int argc, char const *argv[]) | |
| { | |
| if (argc < 3) | |
| { | |
| cout << "missing arguments. (" << 3 - argc << ")" << endl; | |
| return 0; | |
| } | |
| ifstream nfaSource(argv[1]); | |
| ifstream testStrings(argv[2]); | |
| if (nfaSource.is_open() && testStrings.is_open()) | |
| { | |
| string buffer = ""; | |
| nfaSource.seekg(0, std::ios::end); | |
| buffer.reserve(nfaSource.tellg()); | |
| nfaSource.seekg(0, std::ios::beg); | |
| buffer.assign(istreambuf_iterator<char>(nfaSource), istreambuf_iterator<char>()); | |
| TokenProviderNFA nfas; | |
| nfas.Tokenize( buffer ); | |
| nEngine engine; | |
| engine.Digest( &nfas ); | |
| //cout << engine << endl; | |
| //return 0; | |
| string s = "E"; | |
| while( getline( testStrings, s ) /*&& getchar() */) | |
| if( s.length() ) | |
| cout<< ( engine.Match(s) ? "ACCEPT" : "FAIL" ) << ":\t" << s << endl; | |
| } | |
| else | |
| { | |
| cout << "I cant access " << argv[1] << endl; | |
| } | |
| nfaSource.close(); | |
| testStrings.close(); | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using namespace std; | |
| class TokenProviderNFA { | |
| private: | |
| void MoveTo_Space(string::iterator * cursor, string *s) { | |
| while( ++*cursor != s->end() && !isspace(**cursor)); | |
| } | |
| void MoveToNext(string::iterator * cursor, const char n, string * s) { | |
| while (*cursor != s->end() && **cursor != n) | |
| ++*cursor; | |
| } | |
| public: | |
| std::vector<string> Tokens; | |
| TokenProviderNFA() {} | |
| TokenProviderNFA& Tokenize(string s) | |
| { | |
| string::iterator cursor = s.begin() | |
| , offset; | |
| while (cursor != s.end()) | |
| { | |
| switch(*cursor) | |
| { | |
| case '/': | |
| if( *(cursor+1) == '/' ) | |
| MoveToNext(&cursor, '\n', &s); | |
| ++cursor; | |
| break; | |
| default: | |
| if( isspace(*cursor) ) | |
| ++cursor; | |
| else | |
| { | |
| offset = cursor; | |
| MoveTo_Space(&offset,&s); | |
| this->Tokens.push_back(string(cursor, offset)); | |
| cursor = offset; | |
| } | |
| } | |
| } | |
| } | |
| friend ostream& operator<< (ostream &out, TokenProviderNFA &nfa); | |
| }; | |
| ostream& operator<< (ostream &out, TokenProviderNFA &nfa) | |
| { | |
| for(vector<string>::iterator it = nfa.Tokens.begin(); it < nfa.Tokens.end(); ++it) | |
| if( (*it).length() > 0 ) | |
| out << *it << " "; | |
| else | |
| out << " : "; | |
| return out; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment