Skip to content

Instantly share code, notes, and snippets.

@matutter
Last active August 29, 2015 14:06
Show Gist options
  • Select an option

  • Save matutter/f73c38b5ca0850b94c53 to your computer and use it in GitHub Desktop.

Select an option

Save matutter/f73c38b5ca0850b94c53 to your computer and use it in GitHub Desktop.
an NFA engine
// Mathew Utter
//#include <iostream>
#include <string>
#include <cstdlib>
#include <vector>
#include <cstring>
#include <algorithm>
#include "TokenProviderNFA.h"
using namespace std;
class State {
private:
void _super(int f, char o, int t) {
this->From = f;
this->To = t;
this->On = o;
this->Epsilon = o == 'E';
}
public:
bool Epsilon;
int From;
char On;
int To;
State(){}
State(int f, char o, int t) {
this->_super(f,o,t);
}
State(string s, string s1, string s2) {
this->_super(atoi( s.c_str() ),s1[0],atoi( s2.c_str() ));
}
State( State *s )
{
this->From = s->From;
this->On = s->On;
this->To = s->To;
this->Epsilon = s->Epsilon;
}
bool isFrom(int f) {
return f == this->From;
}
bool isOn(char o) {
return o == this->On;
}
bool isTo(int t) {
return t == this->To;
}
bool isEpsilon() {
return this->Epsilon;
}
bool operator==(const State &s) {
return s.From == From && s.To == To && s.On == On;
}
friend ostream& operator<< (ostream &out, State &s);
};
/* fancy vector with some opt for when m=O(n^2) this is linear or this<m */
class Linker : public std::vector<State> {
public:
string::iterator return_cursor;
Linker& SetReturn(string::iterator it) {
this->return_cursor = it;
return *this;
}
/* concat */
Linker& Join(Linker l2) {
l2.Remove( *this );
this->insert(this->end(), l2.begin(), l2.end());
return *this;
}
/* Unique */
Linker& Finalize() {
Linker::iterator cursor = this->begin();
Linker::iterator end = this->end();
if( cursor == end ) return *this;
Linker::iterator final = cursor;
while( ++cursor != end )
if( !(*final == *cursor) )
*(++final) = *cursor;
this->resize( (final+1) - this->begin() );
return *this;
}
/* Contains s <= 1 times */
bool Contains(State s) {
for (Linker::iterator i = this->begin(); i != this->end(); ++i)
if( (*i) == s ) return true;
return false;
}
bool Contains(int s) {
for (Linker::iterator i = this->begin(); i != this->end(); ++i)
if( (*i).From == s ) return true;
return false;
}
bool ContainsEP() {
for (Linker::iterator i = this->begin(); i != this->end(); ++i)
if((*i).isEpsilon()) return true;
return false;
}
/* closure matching 'from' */
Linker From(int c) {
Linker ln;
for (Linker::iterator i = this->begin(); i != this->end(); ++i)
if( (*i).isFrom( c ) )
ln.push_back( *i );
return ln;
}
/* closure matching 'on' or 'E' */
Linker On(char c) {
Linker ln;
for (Linker::iterator i = this->begin(); i != this->end(); ++i)
if( (*i).isOn( c ) || (*i).isEpsilon() )
ln.push_back( *i );
return ln;
}
/* closure matching 'to' */
Linker To(int c) {
Linker ln;
for (Linker::iterator i = this->begin(); i != this->end(); ++i)
if( (*i).isTo( c ) )
ln.push_back( *i );
return ln;
}
Linker& Remove( Linker l ) {
for (Linker::iterator i = this->begin(); i < this->end(); ++i)
for (Linker::iterator x = l.begin(); x != l.end(); ++x)
if( *i == *x ) {
this->erase( i );
--i;
}
return *this;
}
Linker& Close(Linker U, Linker *used) {
while( this->ContainsEP() )
for (Linker::iterator i = this->begin(); i < this->end() && this->size() <= U.size(); ++i)
if( (*i).isEpsilon() )
{
used->push_back(*i);
this->erase(i--);
this->Join( U.From( used->back().To ).Close(U, used) );
break;
}
return *this;
}
Linker& Close(Linker U) {
Linker used;
while( this->ContainsEP() )
for (Linker::iterator i = this->begin(); i < this->end() && this->size() <= U.size(); ++i)
if( (*i).isEpsilon() )
{
used.push_back(*i);
this->erase(i--);
this->Join( U.From( used.back().To ).Close(U, &used) );
break;
}
return *this;
}
bool AcceptedIn(int* ac) {
for (int x = 0; x < sizeof(ac)/sizeof(int); ++x)
{
for (Linker::iterator i = this->begin(); i < this->end(); ++i)
if((*i).isFrom( ac[x] )) return true;
}
return false;
}
friend ostream& operator<< (ostream &out, Linker &en);
};
class nEngine {
private:
int TotalStates;
int FinalStates;
int *AcceptStates;
int StartState;
int AlphaSize;
string Alphabet;
Linker States;
public:
nEngine() {
this->TotalStates = 0;
this->FinalStates = 0;
this->StartState = 0;
this->AlphaSize = 0;
}
bool Match(string s) {
Linker ctx;
vector<Linker> ctx_past;
string::iterator cursor;
cursor = s.begin();
ctx_past.reserve( TotalStates * TotalStates );
ctx = States.From( StartState ).Close(States);
/* fix for E */
if( cursor == s.end()-1 && ctx.AcceptedIn( AcceptStates ))
return true;
while( cursor != s.end() )
{
ctx = ctx.On( *cursor );
switch( ctx.size() )
{
case 0:
if(ctx_past.empty()) {
return cursor == s.end() && ctx.AcceptedIn( AcceptStates );
}
ctx = ctx_past.front();
cursor = ctx.return_cursor;
ctx_past.erase( ctx_past.begin() );
break;
default:
ctx_past.push_back(ctx.SetReturn(cursor));
ctx_past.back().pop_back();
case 1:
/* match at end, needed because branching ruins algebra */
if( cursor+1 == s.end() && ctx.AcceptedIn( AcceptStates ))
return true;
ctx = States.From( ctx.back().To ).Close(States);
cursor++;
}
}
return ctx.AcceptedIn( AcceptStates );
/*
cout << "_________________(" << *cursor << ")" << cursor - s.begin() +1<< endl;
cout << ctx;
*/
}
nEngine& Digest( TokenProviderNFA * nfa ) {
vector<string>::iterator it = nfa->Tokens.begin();
this->TotalStates = atoi( (*it).c_str() );
this->FinalStates = atoi( (*(++it)).c_str() );
this->AcceptStates = new int[this->FinalStates];
for( int i = 0; i < this->FinalStates; i++ )
this->AcceptStates[i] = atoi((*(++it)).c_str());
this->StartState = atoi( (*(++it)).c_str() );
this->AlphaSize = atoi( (*(++it)).c_str() );
this->Alphabet = *(++it);
++it;
for(; it < nfa->Tokens.end(); it+=3 )
this->States.push_back( new State( *it , *(it+1) , *(it+2)) );
return *this;
}
friend ostream& operator<< (ostream &out, nEngine &en);
};
ostream& operator<< (ostream &out, State &s)
{
out << s.From << "\t" << s.On << "\t" << s.To << (s.isEpsilon()?"*":"");
return out;
}
ostream& operator<< (ostream &out, nEngine &en)
{
out << "TS " << en.TotalStates << endl;
out << "FS " << en.FinalStates << endl;
out << "OK ";
for (int i = 0; i < en.FinalStates; ++i)
{
out << en.AcceptStates[i] << " ";
}
out << "" << endl;
out << "SS " << en.StartState << endl;
out << "AS " << en.AlphaSize << endl;
out << "ALPHA " << en.Alphabet << endl;
out << "From\tOn\tTo" << endl;
out << en.States;
return out;
}
ostream& operator<< (ostream &out, Linker &l){
for(Linker::iterator i = l.begin(); i != l.end(); ++i)
cout << *i << "\t" << endl;
return out;
}
// Mathew Utter
#include <iostream>
#include <fstream>
#include "testEngine.h"
using namespace std;
int main(int argc, char const *argv[])
{
if (argc < 3)
{
cout << "missing arguments. (" << 3 - argc << ")" << endl;
return 0;
}
ifstream nfaSource(argv[1]);
ifstream testStrings(argv[2]);
if (nfaSource.is_open() && testStrings.is_open())
{
string buffer = "";
nfaSource.seekg(0, std::ios::end);
buffer.reserve(nfaSource.tellg());
nfaSource.seekg(0, std::ios::beg);
buffer.assign(istreambuf_iterator<char>(nfaSource), istreambuf_iterator<char>());
TokenProviderNFA nfas;
nfas.Tokenize( buffer );
nEngine engine;
engine.Digest( &nfas );
//cout << engine << endl;
//return 0;
string s = "E";
while( getline( testStrings, s ) /*&& getchar() */)
if( s.length() )
cout<< ( engine.Match(s) ? "ACCEPT" : "FAIL" ) << ":\t" << s << endl;
}
else
{
cout << "I cant access " << argv[1] << endl;
}
nfaSource.close();
testStrings.close();
return 0;
}
using namespace std;
class TokenProviderNFA {
private:
void MoveTo_Space(string::iterator * cursor, string *s) {
while( ++*cursor != s->end() && !isspace(**cursor));
}
void MoveToNext(string::iterator * cursor, const char n, string * s) {
while (*cursor != s->end() && **cursor != n)
++*cursor;
}
public:
std::vector<string> Tokens;
TokenProviderNFA() {}
TokenProviderNFA& Tokenize(string s)
{
string::iterator cursor = s.begin()
, offset;
while (cursor != s.end())
{
switch(*cursor)
{
case '/':
if( *(cursor+1) == '/' )
MoveToNext(&cursor, '\n', &s);
++cursor;
break;
default:
if( isspace(*cursor) )
++cursor;
else
{
offset = cursor;
MoveTo_Space(&offset,&s);
this->Tokens.push_back(string(cursor, offset));
cursor = offset;
}
}
}
}
friend ostream& operator<< (ostream &out, TokenProviderNFA &nfa);
};
ostream& operator<< (ostream &out, TokenProviderNFA &nfa)
{
for(vector<string>::iterator it = nfa.Tokens.begin(); it < nfa.Tokens.end(); ++it)
if( (*it).length() > 0 )
out << *it << " ";
else
out << " : ";
return out;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment