Skip to content

Instantly share code, notes, and snippets.

@fowlmouth
Last active September 6, 2015 06:20
Show Gist options
  • Save fowlmouth/479bd70887ec2300b265 to your computer and use it in GitHub Desktop.
Save fowlmouth/479bd70887ec2300b265 to your computer and use it in GitHub Desktop.
#pragma once
namespace glossolalia
{
struct input_state
{
std::string input;
std::size_t pos;
input_state (const std::string& src): input(src), pos(0)
{}
char current() const
{
return input[pos];
}
char eat()
{
return input[pos++];
}
char next(int offs)
{
return input[pos+offs];
}
int compare (const std::string& str)
{
return input.compare(pos, str.size(), str);
}
struct saved_position
{
std::size_t pos;
};
saved_position save_position()
{
return saved_position{.pos = pos};
}
void restore_position(const saved_position& p)
{
pos = p.pos;
}
};
enum match_state
{
invalid_match = 0, str_range, nodes
};
template<class T>
class match_result
{
match_state state;
union{
std::pair<int,int> _range;
std::vector<T> *_nodes;
};
public:
match_result(): state(invalid_match)
{}
match_result(int start, int end): state(str_range)
{
_range = std::make_pair(start,end);
}
explicit match_result(T value): state(nodes)
{
_nodes = new std::vector<T>({value});
}
match_result& operator= (const match_result& other)
{
state = other.state;
if(state == str_range) _range = other._range;
if(state == nodes) _nodes = other._nodes;
return *this;
}
/*
match_result(const match_result& other): state(other.state)
{
if(state == str_range) _range = other.range;
else if(state == nodes) _nodes = other.nodes;
}*/
explicit operator bool()
{
return (bool)state;
}
int get_state() const { return state; }
const std::pair<int,int>& get_range() const { return _range;}
std::vector<T>& get_nodes() { return *_nodes; }
static match_result accumulate (const std::vector<match_result>& matches)
{
if(matches.empty()) return match_result();
match_result res;
for(const auto& m: matches)
{
if(m.get_state() == nodes)
{
if(res.get_state() != nodes)
{
res.state = nodes;
res._nodes = new std::vector<T>(* m._nodes);
}
else
res._nodes->insert(res._nodes->end(), m._nodes->begin(), m._nodes->end());
}
}
if(res.get_state() == nodes)
return res;
//add up str ranges
res.state = str_range;
res._range.first = matches[0]._range.first;
auto high = res._range.first + matches[0]._range.second;
for(auto i = 1; i < matches.size(); ++i)
{
high = std::max(matches[i]._range.first + matches[i]._range.second, high);
}
res._range.second = high - res._range.first;
return res;
}
};
template<class T>
class rule
{
public:
using match_result_t = match_result<T>;
virtual match_result_t match(input_state& is) = 0;
virtual ~rule() {}
};
template<class Bitset>
void set_range (Bitset& bs, char start, char end, bool value)
{
for(; start <= end; ++start)
bs.set(start, value);
}
template<class Bitset>
void add_range (Bitset& bs, char start, char end)
{
set_range(bs, start, end, true);
}
template<class T>
class chs: public rule<T>
{
public:
using bitset_t = std::bitset<127>;
protected:
bitset_t valid;
public:
using match_result_t = typename rule<T>::match_result_t;
virtual match_result_t match(input_state& is)
{
if(valid[is.current()])
{
match_result_t res(is.pos, 1);
is.eat();
return res;
}
return match_result_t();
}
chs(std::initializer_list<char> valid_chars)
{
for(auto c: valid_chars) valid.set((std::size_t)c, true);
}
chs(const bitset_t& valid_chars): valid(valid_chars)
{
}
chs(): valid()
{
}
void set_range (char start, char end, bool value = true)
{
glossolalia::set_range(valid, start, end, value);
}
};
template<class T>
class chr: public rule<T>
{
public:
char valid;
using match_result_t = typename rule<T>::match_result_t;
virtual match_result_t match(input_state& is)
{
if(is.current() == valid)
{
match_result_t res(is.pos, 1);
is.eat();
return res;
}
return match_result_t();
}
chr(char c): valid(c){}
};
template<class T>
class proxy: public rule<T>
{
protected:
rule<T>* real;
public:
proxy(): real(nullptr)
{
}
void set (rule<T>* real_rule)
{
if(real) delete real;
real = real_rule;
}
using match_result_t = typename rule<T>::match_result_t;
virtual match_result_t match(input_state& is)
{
assert(real);
return real->match(is);
}
};
using std::cout;
using std::endl;
template<class T>
class seq: public rule<T>
{
protected:
std::vector<rule<T>*> sub;
public:
using match_result_t = typename rule<T>::match_result_t;
virtual match_result_t match(input_state& is)
{
std::vector<match_result_t> matches;
const auto& pos = is.save_position();
for(typename std::vector<rule<T>*>::const_iterator it = sub.begin(); it != sub.end(); ++it)
{
auto m = (*it)->match(is);
if(m)
matches.push_back(m);
else
{
is.restore_position(pos);
return match_result_t();
}
}
return match_result_t::accumulate(matches);
}
seq (std::initializer_list<rule<T>*> sub): sub(sub)
{
}
};
//matches the first valid child
template<class T>
class disj: public seq<T>
{
public:
using match_result_t = typename rule<T>::match_result_t;
using seq<T>::seq;
virtual match_result_t match(input_state& is)
{
assert(this->sub.size() > 0);
const auto pos = is.save_position();
for(typename std::vector<rule<T>*>::const_iterator it = this->sub.begin(); it != this->sub.end(); ++it)
{
auto m = (*it)->match(is);
if(m) return m;
is.restore_position(pos);
}
return match_result_t();
}
};
template<class T>
class capture: public rule<T>
{
public:
using match_result_t = typename rule<T>::match_result_t;
protected:
rule<T> *sub;
std::function<match_result_t(match_result_t, const input_state&)> fn;
public:
capture (rule<T>* subrule, std::function<T(const std::string&)> cb):
sub(subrule)
{
fn = [cb](match_result_t m, const input_state& is)->match_result_t {
if(m.get_state() == str_range)
{
const auto& range = m.get_range();
return match_result_t( cb(is.input.substr(range.first, range.second)) );
}
return m;
};
}
capture (rule<T>* subrule, std::function<T(const std::vector<T>&)> cb):
sub(subrule)
{
fn = [cb](match_result_t m, const input_state& is)->match_result_t
{
if(m.get_state() == nodes)
{
return match_result_t( cb(m.get_nodes()) );
}
return m;
};
}
using str_iter = std::string::const_iterator;
capture (rule<T>* subrule, std::function<T(const str_iter&, const str_iter&)> cb):
sub(subrule)
{
fn = [cb](match_result_t m, const input_state& is)->match_result_t
{
if(m.get_state() == str_range)
{
str_iter start = is.input.begin() + m.get_range().first;
str_iter end = start + m.get_range().second;
return match_result_t( cb(start, end) );
}
return m;
};
}
virtual match_result_t match(input_state& is)
{
auto res = sub->match(is);
auto p = fn(res, is);
if(p) res = match_result_t(p);
return res;
}
};
template<class T>
class repeat: public rule<T>
{
rule<T>* sub;
int min, max;
public:
using match_result_t = typename rule<T>::match_result_t;
virtual match_result_t match(input_state& is)
{
auto start = is.save_position();
std::vector<match_result_t> matches;
while(is.current() != '\0')
{
if(max > -1 && matches.size() >= max)
break;
auto m = sub->match(is);
if(m)
{
matches.push_back(std::move(m));
continue;
}
break;
}
if(matches.size() < min)
{
is.restore_position(start);
return match_result_t();
}
if(matches.size() > 0)
return match_result_t::accumulate(matches);
return match_result_t(is.pos, 0);
}
repeat(rule<T>* sub, int min = 0, int max = -1):
sub(sub), min(min), max(max)
{
}
};
template<class T>
struct grammar
{
public:
using rule = rule<T>;
using capture = capture<T>;
using seq = seq<T>;
using disj = disj<T>;
using chs = chs<T>;
using chr = chr<T>;
using repeat = repeat<T>;
using proxy = proxy<T>;
using gr = grammar<T>;
inline static rule* join (rule* a, rule* b, int min = 0, int max = -1)
{
return new seq({a, new repeat(new seq({b, a}), min, max)});
}
};
void test_disj()
{
using gr = grammar<int>;
auto c1 = new gr::chr('b');
auto c2 = new gr::chr('a');
auto dj_test = new gr::disj({c1, c2});
auto is = input_state("a");
auto res = dj_test->match(is);
assert(res);
assert(res.get_range() == std::make_pair(0,1));
}
void test_evaluator()
{
using gr = grammar<int>;
//matches one digit 0-9
auto digit = new gr::chs();
digit->set_range('0', '9');
//matches a sequence of digits
auto digits = new gr::repeat(digit, 1);
//saves that sequence as an integer
auto number = new gr::capture(digits,
[](std::string::const_iterator start, std::string::const_iterator end)->int
{
int n = 0;
for(; start < end; ++start)
n = (n * 10) + (*start - '0');
return n;
}
);
//so that additive and multiplicative can reference theirselves before they're defined
auto additive = new gr::proxy();
auto multiplicative = new gr::proxy();
auto capture_additive = [](const std::vector<int>& numbers)->int
{
int res = 0;
for(auto n: numbers) res += n;
return res;
};
auto capture_multiplicative = [](const std::vector<int>& numbers)->int
{
int res = 1;
for(auto n: numbers) res *= n;
return res;
};
additive->set(new gr::disj({
new gr::capture(new gr::seq({multiplicative, new gr::chr('+'), additive}),
capture_additive),
multiplicative}));
multiplicative->set(new gr::disj({
new gr::capture(new gr::seq({number, new gr::chr('*'), multiplicative}),
capture_multiplicative),
number
}));
auto is = input_state("1+2*3");
auto result = additive->match(is);
assert(result.get_state() == nodes);
assert(result.get_nodes().at(0) == 7);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment