Last active
September 6, 2015 06:20
-
-
Save fowlmouth/479bd70887ec2300b265 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
namespace glossolalia | |
{ | |
struct input_state | |
{ | |
std::string input; | |
std::size_t pos; | |
input_state (const std::string& src): input(src), pos(0) | |
{} | |
char current() const | |
{ | |
return input[pos]; | |
} | |
char eat() | |
{ | |
return input[pos++]; | |
} | |
char next(int offs) | |
{ | |
return input[pos+offs]; | |
} | |
int compare (const std::string& str) | |
{ | |
return input.compare(pos, str.size(), str); | |
} | |
struct saved_position | |
{ | |
std::size_t pos; | |
}; | |
saved_position save_position() | |
{ | |
return saved_position{.pos = pos}; | |
} | |
void restore_position(const saved_position& p) | |
{ | |
pos = p.pos; | |
} | |
}; | |
enum match_state | |
{ | |
invalid_match = 0, str_range, nodes | |
}; | |
template<class T> | |
class match_result | |
{ | |
match_state state; | |
union{ | |
std::pair<int,int> _range; | |
std::vector<T> *_nodes; | |
}; | |
public: | |
match_result(): state(invalid_match) | |
{} | |
match_result(int start, int end): state(str_range) | |
{ | |
_range = std::make_pair(start,end); | |
} | |
explicit match_result(T value): state(nodes) | |
{ | |
_nodes = new std::vector<T>({value}); | |
} | |
match_result& operator= (const match_result& other) | |
{ | |
state = other.state; | |
if(state == str_range) _range = other._range; | |
if(state == nodes) _nodes = other._nodes; | |
return *this; | |
} | |
/* | |
match_result(const match_result& other): state(other.state) | |
{ | |
if(state == str_range) _range = other.range; | |
else if(state == nodes) _nodes = other.nodes; | |
}*/ | |
explicit operator bool() | |
{ | |
return (bool)state; | |
} | |
int get_state() const { return state; } | |
const std::pair<int,int>& get_range() const { return _range;} | |
std::vector<T>& get_nodes() { return *_nodes; } | |
static match_result accumulate (const std::vector<match_result>& matches) | |
{ | |
if(matches.empty()) return match_result(); | |
match_result res; | |
for(const auto& m: matches) | |
{ | |
if(m.get_state() == nodes) | |
{ | |
if(res.get_state() != nodes) | |
{ | |
res.state = nodes; | |
res._nodes = new std::vector<T>(* m._nodes); | |
} | |
else | |
res._nodes->insert(res._nodes->end(), m._nodes->begin(), m._nodes->end()); | |
} | |
} | |
if(res.get_state() == nodes) | |
return res; | |
//add up str ranges | |
res.state = str_range; | |
res._range.first = matches[0]._range.first; | |
auto high = res._range.first + matches[0]._range.second; | |
for(auto i = 1; i < matches.size(); ++i) | |
{ | |
high = std::max(matches[i]._range.first + matches[i]._range.second, high); | |
} | |
res._range.second = high - res._range.first; | |
return res; | |
} | |
}; | |
template<class T> | |
class rule | |
{ | |
public: | |
using match_result_t = match_result<T>; | |
virtual match_result_t match(input_state& is) = 0; | |
virtual ~rule() {} | |
}; | |
template<class Bitset> | |
void set_range (Bitset& bs, char start, char end, bool value) | |
{ | |
for(; start <= end; ++start) | |
bs.set(start, value); | |
} | |
template<class Bitset> | |
void add_range (Bitset& bs, char start, char end) | |
{ | |
set_range(bs, start, end, true); | |
} | |
template<class T> | |
class chs: public rule<T> | |
{ | |
public: | |
using bitset_t = std::bitset<127>; | |
protected: | |
bitset_t valid; | |
public: | |
using match_result_t = typename rule<T>::match_result_t; | |
virtual match_result_t match(input_state& is) | |
{ | |
if(valid[is.current()]) | |
{ | |
match_result_t res(is.pos, 1); | |
is.eat(); | |
return res; | |
} | |
return match_result_t(); | |
} | |
chs(std::initializer_list<char> valid_chars) | |
{ | |
for(auto c: valid_chars) valid.set((std::size_t)c, true); | |
} | |
chs(const bitset_t& valid_chars): valid(valid_chars) | |
{ | |
} | |
chs(): valid() | |
{ | |
} | |
void set_range (char start, char end, bool value = true) | |
{ | |
glossolalia::set_range(valid, start, end, value); | |
} | |
}; | |
template<class T> | |
class chr: public rule<T> | |
{ | |
public: | |
char valid; | |
using match_result_t = typename rule<T>::match_result_t; | |
virtual match_result_t match(input_state& is) | |
{ | |
if(is.current() == valid) | |
{ | |
match_result_t res(is.pos, 1); | |
is.eat(); | |
return res; | |
} | |
return match_result_t(); | |
} | |
chr(char c): valid(c){} | |
}; | |
template<class T> | |
class proxy: public rule<T> | |
{ | |
protected: | |
rule<T>* real; | |
public: | |
proxy(): real(nullptr) | |
{ | |
} | |
void set (rule<T>* real_rule) | |
{ | |
if(real) delete real; | |
real = real_rule; | |
} | |
using match_result_t = typename rule<T>::match_result_t; | |
virtual match_result_t match(input_state& is) | |
{ | |
assert(real); | |
return real->match(is); | |
} | |
}; | |
using std::cout; | |
using std::endl; | |
template<class T> | |
class seq: public rule<T> | |
{ | |
protected: | |
std::vector<rule<T>*> sub; | |
public: | |
using match_result_t = typename rule<T>::match_result_t; | |
virtual match_result_t match(input_state& is) | |
{ | |
std::vector<match_result_t> matches; | |
const auto& pos = is.save_position(); | |
for(typename std::vector<rule<T>*>::const_iterator it = sub.begin(); it != sub.end(); ++it) | |
{ | |
auto m = (*it)->match(is); | |
if(m) | |
matches.push_back(m); | |
else | |
{ | |
is.restore_position(pos); | |
return match_result_t(); | |
} | |
} | |
return match_result_t::accumulate(matches); | |
} | |
seq (std::initializer_list<rule<T>*> sub): sub(sub) | |
{ | |
} | |
}; | |
//matches the first valid child | |
template<class T> | |
class disj: public seq<T> | |
{ | |
public: | |
using match_result_t = typename rule<T>::match_result_t; | |
using seq<T>::seq; | |
virtual match_result_t match(input_state& is) | |
{ | |
assert(this->sub.size() > 0); | |
const auto pos = is.save_position(); | |
for(typename std::vector<rule<T>*>::const_iterator it = this->sub.begin(); it != this->sub.end(); ++it) | |
{ | |
auto m = (*it)->match(is); | |
if(m) return m; | |
is.restore_position(pos); | |
} | |
return match_result_t(); | |
} | |
}; | |
template<class T> | |
class capture: public rule<T> | |
{ | |
public: | |
using match_result_t = typename rule<T>::match_result_t; | |
protected: | |
rule<T> *sub; | |
std::function<match_result_t(match_result_t, const input_state&)> fn; | |
public: | |
capture (rule<T>* subrule, std::function<T(const std::string&)> cb): | |
sub(subrule) | |
{ | |
fn = [cb](match_result_t m, const input_state& is)->match_result_t { | |
if(m.get_state() == str_range) | |
{ | |
const auto& range = m.get_range(); | |
return match_result_t( cb(is.input.substr(range.first, range.second)) ); | |
} | |
return m; | |
}; | |
} | |
capture (rule<T>* subrule, std::function<T(const std::vector<T>&)> cb): | |
sub(subrule) | |
{ | |
fn = [cb](match_result_t m, const input_state& is)->match_result_t | |
{ | |
if(m.get_state() == nodes) | |
{ | |
return match_result_t( cb(m.get_nodes()) ); | |
} | |
return m; | |
}; | |
} | |
using str_iter = std::string::const_iterator; | |
capture (rule<T>* subrule, std::function<T(const str_iter&, const str_iter&)> cb): | |
sub(subrule) | |
{ | |
fn = [cb](match_result_t m, const input_state& is)->match_result_t | |
{ | |
if(m.get_state() == str_range) | |
{ | |
str_iter start = is.input.begin() + m.get_range().first; | |
str_iter end = start + m.get_range().second; | |
return match_result_t( cb(start, end) ); | |
} | |
return m; | |
}; | |
} | |
virtual match_result_t match(input_state& is) | |
{ | |
auto res = sub->match(is); | |
auto p = fn(res, is); | |
if(p) res = match_result_t(p); | |
return res; | |
} | |
}; | |
template<class T> | |
class repeat: public rule<T> | |
{ | |
rule<T>* sub; | |
int min, max; | |
public: | |
using match_result_t = typename rule<T>::match_result_t; | |
virtual match_result_t match(input_state& is) | |
{ | |
auto start = is.save_position(); | |
std::vector<match_result_t> matches; | |
while(is.current() != '\0') | |
{ | |
if(max > -1 && matches.size() >= max) | |
break; | |
auto m = sub->match(is); | |
if(m) | |
{ | |
matches.push_back(std::move(m)); | |
continue; | |
} | |
break; | |
} | |
if(matches.size() < min) | |
{ | |
is.restore_position(start); | |
return match_result_t(); | |
} | |
if(matches.size() > 0) | |
return match_result_t::accumulate(matches); | |
return match_result_t(is.pos, 0); | |
} | |
repeat(rule<T>* sub, int min = 0, int max = -1): | |
sub(sub), min(min), max(max) | |
{ | |
} | |
}; | |
template<class T> | |
struct grammar | |
{ | |
public: | |
using rule = rule<T>; | |
using capture = capture<T>; | |
using seq = seq<T>; | |
using disj = disj<T>; | |
using chs = chs<T>; | |
using chr = chr<T>; | |
using repeat = repeat<T>; | |
using proxy = proxy<T>; | |
using gr = grammar<T>; | |
inline static rule* join (rule* a, rule* b, int min = 0, int max = -1) | |
{ | |
return new seq({a, new repeat(new seq({b, a}), min, max)}); | |
} | |
}; | |
void test_disj() | |
{ | |
using gr = grammar<int>; | |
auto c1 = new gr::chr('b'); | |
auto c2 = new gr::chr('a'); | |
auto dj_test = new gr::disj({c1, c2}); | |
auto is = input_state("a"); | |
auto res = dj_test->match(is); | |
assert(res); | |
assert(res.get_range() == std::make_pair(0,1)); | |
} | |
void test_evaluator() | |
{ | |
using gr = grammar<int>; | |
//matches one digit 0-9 | |
auto digit = new gr::chs(); | |
digit->set_range('0', '9'); | |
//matches a sequence of digits | |
auto digits = new gr::repeat(digit, 1); | |
//saves that sequence as an integer | |
auto number = new gr::capture(digits, | |
[](std::string::const_iterator start, std::string::const_iterator end)->int | |
{ | |
int n = 0; | |
for(; start < end; ++start) | |
n = (n * 10) + (*start - '0'); | |
return n; | |
} | |
); | |
//so that additive and multiplicative can reference theirselves before they're defined | |
auto additive = new gr::proxy(); | |
auto multiplicative = new gr::proxy(); | |
auto capture_additive = [](const std::vector<int>& numbers)->int | |
{ | |
int res = 0; | |
for(auto n: numbers) res += n; | |
return res; | |
}; | |
auto capture_multiplicative = [](const std::vector<int>& numbers)->int | |
{ | |
int res = 1; | |
for(auto n: numbers) res *= n; | |
return res; | |
}; | |
additive->set(new gr::disj({ | |
new gr::capture(new gr::seq({multiplicative, new gr::chr('+'), additive}), | |
capture_additive), | |
multiplicative})); | |
multiplicative->set(new gr::disj({ | |
new gr::capture(new gr::seq({number, new gr::chr('*'), multiplicative}), | |
capture_multiplicative), | |
number | |
})); | |
auto is = input_state("1+2*3"); | |
auto result = additive->match(is); | |
assert(result.get_state() == nodes); | |
assert(result.get_nodes().at(0) == 7); | |
} | |
}; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment