Skip to content

Instantly share code, notes, and snippets.

@marionette-of-u
Last active August 29, 2015 14:23
Show Gist options
  • Save marionette-of-u/cd23016454f22bfb3383 to your computer and use it in GitHub Desktop.
Save marionette-of-u/cd23016454f22bfb3383 to your computer and use it in GitHub Desktop.
<token> token{
<right>{ unary_minus; }
<left>{
mul = "\*", div = "\/";
add = "\+", sub = "-";
}
left_paren = "\(", right_paren = "\)";
id<int> = "[a-zA-Z_][a-zA-Z0-9_]*";
}
<grammar> parser{
[default_value]
E<int>
: [make_add] E(0) add E(1)
| [make_sub] E(0) sub E(1)
| [make_mlt] E(0) mul E(1)
| [make_div] E(0) div E(1)
| [make_id] left_paren E(0) right_paren
| [make_umns] <unary_minus> sub E(0)
| [make_id] id(0)
;
}
#include <iostream>
#include <vector>
#include <stack>
#include <set>
#include <map>
#include <functional>
#include <initializer_list>
template<class Term>
struct default_epsilon_functor{
Term operator ()() const{
return Term();
}
};
template<class Term>
struct default_is_terminal_symbol_functor{
template<class G>
bool operator ()(Term const &x, G const &g) const{
return g.find(x) == g.end();
}
};
template<
class SemanticType,
class Term,
class EOSFunctor,
class DummyFunctor,
class IsTerminalSymbolFunctor = default_is_terminal_symbol_functor<Term>,
class EpsilonFunctor = default_epsilon_functor<Term>
>
class lalr{
public:
using semantic_type = SemanticType;
using term = Term;
using eos_functor = EOSFunctor;
using dummy_functor = DummyFunctor;
using is_terminal_symbol_functor = IsTerminalSymbolFunctor;
using epsilon_functor = EpsilonFunctor;
using term_set = std::set<term>;
using first_set_type = std::map<term, term_set>;
enum class linkdir{
nonassoc,
left,
right
};
struct symbol_data_type{
linkdir dir = linkdir::nonassoc;
std::size_t priority = 0;
};
using symbol_data_map = std::map<term, symbol_data_type>;
class term_sequence : public std::vector<term>{
public:
term_sequence() : std::vector<term>(), semantic_data(){}
term_sequence(term_sequence const &other) : std::vector<term>(other), semantic_data(other.semantic_data){}
term_sequence(std::initializer_list<term> list, semantic_type const &s) : std::vector<term>(list), semantic_data(s){}
semantic_type semantic_data;
term tag = epsilon_functor()();
};
struct item{
term lhs;
term_sequence rhs;
std::size_t pos;
mutable term_set lookahead;
bool is_over() const{
return pos >= rhs.size();
}
term const &curr() const{
return rhs[pos];
}
bool equal(item const &other) const{
return lhs == other.lhs && rhs == other.rhs && pos == other.pos && lookahead == other.lookahead;
}
};
struct rule_rhs_comparetor{
bool operator ()(term_sequence const &x, term_sequence const &y) const{
std::size_t i;
for(i = 0; i < x.size() && i < y.size(); ++i){
if(x[i] < y[i]){
return true;
}else if(x[i] > y[i]){
return false;
}
}
return x.size() < y.size();
}
};
struct item_comparetor{
bool operator ()(item const &x, item const &y) const{
if(x.lhs < y.lhs){
return true;
}else if(x.lhs > y.lhs){
return false;
}
std::size_t i;
for(i = 0; i < x.rhs.size() && i < y.rhs.size(); ++i){
if(x.rhs[i] < y.rhs[i]){
return true;
}else if(x.rhs[i] > y.rhs[i]){
return false;
}
}
if(x.rhs.size() != y.rhs.size()){
return x.rhs.size() < y.rhs.size();
}else{
return x.pos < y.pos;
}
}
};
struct exteded_item_comparetor{
bool operator ()(item const &x, item const &y) const{
if(x.lhs < y.lhs){
return true;
}else if(x.lhs > y.lhs){
return false;
}
std::size_t i;
for(i = 0; i < x.rhs.size() && i < y.rhs.size(); ++i){
if(x.rhs[i] < y.rhs[i]){
return true;
}else if(x.rhs[i] > y.rhs[i]){
return false;
}
}
if(x.rhs.size() != y.rhs.size()){
return x.rhs.size() < y.rhs.size();
}else{
if(x.pos < y.pos){
return true;
}else if(x.pos > y.pos){
return false;
}
}
return x.lookahead < y.lookahead;
}
};
using rule_rhs = std::set<term_sequence, rule_rhs_comparetor>;
using grammar = std::map<term, rule_rhs>;
class items : public std::set<item, item_comparetor>{
public:
using base_type = std::set<item, item_comparetor>;
using goto_map_type = std::map<term, items const*>;
items const mutable *mirror;
goto_map_type mutable goto_map;
items() : base_type(), mirror(nullptr), goto_map(){}
items(items const &other) : base_type(other), mirror(other.mirror), goto_map(other.goto_map){}
items(items &&other) : base_type(other), mirror(std::move(other.mirror)), goto_map(std::move(other.goto_map)){}
items(std::initializer_list<item> list) : base_type(list), mirror(nullptr), goto_map(){}
items &operator =(items const &other){
base_type::operator =(other);
mirror = other.mirror;
goto_map.operator =(other.goto_map);
return *this;
}
items &operator =(items &&other){
base_type::operator =(other);
mirror = std::move(other.mirror);
goto_map.operator =(std::move(other.goto_map));
return *this;
}
std::pair<typename base_type::iterator, bool> insert(item const &i){
std::pair<typename base_type::iterator, bool> p = base_type::insert(i);
if(!p.second){
p.first->lookahead.insert(i.lookahead.begin(), i.lookahead.end());
}
return p;
}
};
struct items_comparetor{
bool operator ()(items const &x, items const &y) const{
auto xter = x.begin(), yter = y.begin();
for(; xter != x.end() && yter != y.end(); ++xter, ++yter){
if(exteded_item_comparetor()(*xter, *yter)){
return true;
}else if(exteded_item_comparetor()(*yter, *xter)){
return false;
}
}
return x.size() < y.size();
}
};
using states = std::set<items, items_comparetor>;
struct limited_items_comparetor{
bool operator ()(items const *x, items const *y) const{
auto xter = x->begin(), yter = y->begin();
for(; xter != x->end() && yter != y->end(); ++xter, ++yter){
if(item_comparetor()(*xter, *yter)){
return true;
}else if(item_comparetor()(*yter, *xter)){
return false;
}
}
return x->size() < y->size();
}
};
using lr0_kernel_states = std::set<items const*, limited_items_comparetor>;
struct lr_parsing_table_item{
enum class enum_action{
shift,
reduce,
accept
};
enum_action action;
std::size_t num;
item const *item_ptr = nullptr;
bool operator ==(lr_parsing_table_item const &other) const{
return action == other.action && num == other.num;
}
bool operator !=(lr_parsing_table_item const &other) const{
return !(*this == other);
}
};
struct lr_parsing_table_item_comparetor{
bool operator ()(lr_parsing_table_item const &lhs, lr_parsing_table_item const &rhs) const{
if(lhs.action < rhs.action){
return true;
}else if(lhs.action > rhs.action){
return false;
}
return lhs.num < rhs.num;
}
};
struct lr_conflict{
lr_parsing_table_item lhs, rhs;
};
struct lr_conflict_comparetor{
bool operator ()(lr_conflict const &lhs, lr_conflict const &rhs) const{
if(lr_parsing_table_item_comparetor()(lhs.lhs, rhs.lhs)){
return true;
}else if(lr_parsing_table_item_comparetor()(rhs.lhs, lhs.lhs)){
return false;
}
return lr_parsing_table_item_comparetor()(lhs.rhs, rhs.rhs);
}
};
using lr_parsing_table = std::map<std::size_t, std::map<term, lr_parsing_table_item>>;
using lr_goto_table = std::map<std::size_t, std::map<term, std::size_t>>;
using state_to_num = std::map<items const*, std::size_t, limited_items_comparetor>;
using num_to_state = std::map<std::size_t, items const*>;
using rule_to_num = std::map<term_sequence const*, std::size_t>;
using num_to_rule = std::map<std::size_t, std::pair<term, term_sequence const*>>;
using lr_conflict_set = std::set<lr_conflict, lr_conflict_comparetor>;
struct make_result{
std::size_t first;
state_to_num s2n;
num_to_state n2s;
rule_to_num r2n;
num_to_rule n2r;
lr_parsing_table parsing_table;
lr_goto_table goto_table;
lr_conflict_set conflict_set;
};
static bool states_comparetor(states const &x, states const &y){
auto xter = x.begin(), yter = y.begin();
for(; xter != x.end() && yter != y.end(); ++xter, ++yter){
if(items_comparetor()(*xter, *yter) || items_comparetor()(*yter, *xter)){
return false;
}
}
return x.size() == y.size();
}
template<class Set>
static Set union_set(Set const &x, Set const &y){
Set i = x;
for(auto iter = y.begin(); iter != y.end(); ++iter){
i.insert(*iter);
}
return i;
}
template<class Set>
static Set inter_set(Set const &x, Set const &y){
Set i;
for(auto iter = x.begin(); iter != x.end(); ++iter){
if(y.find(*iter) != y.end()){
i.insert(*iter);
}
}
return i;
}
static term_set make_terminal_symbol_set(grammar const &g){
term_set s;
for(auto iter = g.begin(); iter != g.end(); ++iter){
for(auto jter = iter->second.begin(); jter != iter->second.end(); ++jter){
for(auto kter = jter->begin(); kter != jter->end(); ++kter){
if(is_terminal_symbol_functor()(*kter, g)){
s.insert(*kter);
}
}
}
}
return s;
}
mutable std::map<term, term_set> first_set_cache;
term_set const &first_set(grammar const &g, term const &x){
std::set<term> s;
auto p = first_set_cache.insert(std::make_pair(x, term_set()));
if(p.second){
p.first->second = first_set_impl(g, x, s);
}
return p.first->second;
}
static term_set first_set_impl(grammar const &g, term const &x, std::set<term> &scanned){
term_set r;
if(is_terminal_symbol_functor()(x, g)){
r.insert(x);
return r;
}
if(scanned.find(x) != scanned.end()){
return r;
}
scanned.insert(x);
rule_rhs const &rhs(g.find(x)->second);
auto iter = rhs.begin();
for(; iter != rhs.end(); ++iter){
if(iter->empty()){
r.insert(epsilon_functor()());
continue;
}
auto jter = iter->begin();
bool epsilon = true;
for(; epsilon && jter != iter->end(); ++jter){
term_set s = first_set_impl(g, *jter, scanned);
epsilon = s.find(epsilon_functor()()) != s.end();
if(epsilon){
s.erase(epsilon_functor()());
}
r = union_set(r, s);
}
if(epsilon && jter == iter->end()){
r.insert(epsilon_functor()());
}
}
return r;
}
std::map<term, term_set> follow_set;
void make_follow_set(grammar const &g, term const &s){
follow_set[s].insert(eos_functor()());
std::size_t size = 0, psize;
do{
psize = size;
for(std::pair<term, rule_rhs> const &p : g){
for(term_sequence const &seq : p.second){
if(seq.size() < 2){ continue; }
for(auto iter = seq.rbegin() + 1; iter != seq.rend() - 1; ++iter){
term const &b = *iter, &beta = *(iter - 1);
if(is_terminal_symbol_functor()(b, g)){ continue; }
term_set &b_set = follow_set[b];
std::size_t b_size = b_set.size();
for(auto jter = iter; ; ){
if(jter == seq.rbegin()){ break; }
--jter;
term_set const &first_beta(first_set(g, *jter));
b_set.insert(first_beta.begin(), first_beta.end());
if(b_set.find(epsilon_functor()()) != b_set.end()){
b_set.erase(epsilon_functor()());
}
size += b_set.size() - b_size;
b_size = b_set.size();
}
}
}
}
for(std::pair<term, rule_rhs> const &p : g){
for(term_sequence const &seq : p.second){
if(seq.empty()){ continue; }
term_set const &a_set = follow_set[p.first];
if(!is_terminal_symbol_functor()(seq.back(), g)){
term const &b = seq.back();
term_set &b_set = follow_set[b];
std::size_t b_size = b_set.size();
b_set.insert(a_set.begin(), a_set.end());
size += b_set.size() - b_size;
}
if(seq.size() == 1){ continue; }
for(auto iter = seq.rbegin() + 1; iter != seq.rend() - 1; ++iter){
term const &b = *iter, &beta = *(iter - 1);
if(is_terminal_symbol_functor()(b, g)){ continue; }
term_set &b_set = follow_set[b];
std::size_t b_size = b_set.size();
for(auto jter = iter; ; ){
if(jter == seq.rbegin()){ break; }
--jter;
term_set const &first_beta(first_set(g, *jter));
bool epsilon = first_beta.find(epsilon_functor()()) != first_beta.end();
if(epsilon){
b_set.insert(a_set.begin(), a_set.end());
size += b_set.size() - b_size;
b_size = b_set.size();
}
}
}
}
}
}while(size != psize);
}
static items lr0_closure(grammar const &g, items j){
std::size_t s;
do{
s = j.size();
for(auto iter = j.begin(); iter != j.end(); ++iter){
if(iter->is_over() || is_terminal_symbol_functor()(iter->curr(), g)){
continue;
}
term const &b = iter->curr();
rule_rhs const &b_beta = g.find(b)->second;
for(auto bter = b_beta.begin(); bter != b_beta.end(); ++bter){
item t;
t.lhs = b;
t.rhs = *bter;
t.pos = 0;
if(j.find(t) == j.end()){
j.insert(t);
}
}
}
}while(s != j.size());
return j;
}
static items lr0_goto(grammar const &g, items const &i, term const &x){
items r;
for(auto iter = i.begin(); iter != i.end(); ++iter){
item const &current_item(*iter);
if(current_item.is_over()){
continue;
}
if(current_item.curr() != x){
continue;
}
item next_item = current_item;
++next_item.pos;
r.insert(next_item);
}
return lr0_closure(g, r);
}
static states lr0_items(grammar const &g, term_set const &terminal_symbol_set, item const &start){
states c;
{
items init;
init.insert(start);
c.insert(lr0_closure(g, init));
}
std::size_t size;
do{
size = c.size();
for(auto iter = c.begin(); iter != c.end(); ++iter){
items const &i(*iter);
auto lambda = [&](term const &x){
items n_items = lr0_goto(g, i, x);
if(!n_items.empty() && c.find(n_items) == c.end()){
c.insert(n_items);
}
};
for(auto gter = g.begin(); gter != g.end(); ++gter){
lambda(gter->first);
}
for(auto xter = terminal_symbol_set.begin(); xter != terminal_symbol_set.end(); ++xter){
lambda(*xter);
}
}
}while(size != c.size());
return c;
}
items lr1_closure(grammar const &g, items i){
std::size_t s;
do{
s = i.size();
for(auto iter = i.begin(); iter != i.end(); ++iter){
item const &i_item(*iter);
if(i_item.is_over()){
continue;
}
if(is_terminal_symbol_functor()(i_item.curr(), g)){
continue;
}
rule_rhs const &rule(g.find(i_item.curr())->second);
for(auto jter = rule.begin(); jter != rule.end(); ++jter){
term_set first_beta_a;
if(i_item.pos + 1 < i_item.rhs.size()){
for(std::size_t n = i_item.pos + 1; n < i_item.rhs.size(); ++n){
term_set const &f = first_set(g, i_item.rhs[n]);
bool epsilon = f.find(epsilon_functor()()) != f.end();
for(term const &fe : f){
first_beta_a.insert(fe);
}
if(!epsilon){
break;
}
}
}
if(first_beta_a.empty()){
first_beta_a = i_item.lookahead;
}else if(first_beta_a.find(epsilon_functor()()) != first_beta_a.end()){
first_beta_a.erase(epsilon_functor()());
first_beta_a.insert(i_item.lookahead.begin(), i_item.lookahead.end());
}
item t;
t.lhs = i_item.curr();
t.rhs = *jter;
t.pos = 0;
t.lookahead = first_beta_a;
i.insert(t);
}
}
}while(s != i.size());
return i;
}
items lr1_goto(grammar const &g, items const &i, term const &x){
items j;
for(auto iter = i.begin(); iter != i.end(); ++iter){
if(iter->is_over()){
continue;
}
if(iter->curr() == x){
item t = *iter;
++t.pos;
j.insert(t);
}
}
return lr1_closure(g, j);
}
static states lr1_items(grammar const &g, term_set const &terminal_symbol_set, item const &start){
states c;
{
items init;
init.insert(start);
c.insert(lr1_closure(g, init));
}
std::size_t s;
do{
s = c.size();
for(auto iter = c.begin(); iter != c.end(); ++iter){
for(auto jter = g.begin(); jter != g.end(); ++jter){
items n = lr1_goto(g, *iter, jter->first);
if(!n.empty() && c.find(n) == c.end()){
c.insert(n);
}
}
for(auto jter = terminal_symbol_set.begin(); jter != terminal_symbol_set.end(); ++jter){
items n = lr1_goto(g, *iter, *jter);
if(!n.empty() && c.find(n) == c.end()){
c.insert(n);
}
}
}
}while(s != c.size());
return c;
}
static items kernel_filter(items i, item const &start){
for(auto iter = i.begin(); iter != i.end(); ){
if(iter->equal(start)){
++iter;
continue;
}
if(iter->pos == 0){
iter = i.erase(iter);
}else{
++iter;
}
}
return i;
}
static void lr0_kernel_items(grammar const &g, states &c_prime, states &c, term_set const &terminal_symbol_set, item const &start){
c_prime = lr0_items(g, terminal_symbol_set, start);
for(items const &i : c_prime){
auto p = c.insert(kernel_filter(i, start));
p.first->mirror = &i;
i.mirror = &*p.first;
}
}
static void make_goto_map(grammar const &g, term_set const &h, states const &c_prime, states const &c, item const &start){
term_set k;
for(auto const &r : g){
k.insert(r.first);
}
for(items const &s : c_prime){
auto f = [&](term_set const &ts){
for(term const &t : ts){
items n_goto = kernel_filter(lr0_goto(g, s, t), start);
if(n_goto.empty()){
continue;
}
s.mirror->goto_map[t] = &*c.find(n_goto);
}
};
f(h);
f(k);
}
}
void completion_lookahead(grammar const &g, states &s, item const &start){
std::map<item const*, std::set<item const*>> prop_map;
lr0_kernel_states s_prime;
for(auto const &k : s){
s_prime.insert(&k);
}
for(items const &k : s){
for(item const &i : k){
std::set<item const*> &prop_set(prop_map[&i]);
item i_prime = i;
i_prime.lookahead.clear();
i_prime.lookahead.insert(dummy_functor()());
items j;
j.insert(i_prime);
j = lr1_closure(g, j);
for(item const &b : j){
if(b.is_over()){
continue;
}
items n_goto = kernel_filter(lr1_goto(g, k, b.curr()), start);
item next = b;
++next.pos;
item const *next_b = &*k.goto_map[b.curr()]->find(next);
for(term const &t : b.lookahead){
if(t != dummy_functor()()){
next_b->lookahead.insert(t);
}else{
prop_set.insert(next_b);
}
}
}
}
}
bool z;
do{
z = false;
for(std::pair<item const*, std::set<item const*>> const &p : prop_map){
for(item const *dest : p.second){
std::size_t prev_size = dest->lookahead.size();
dest->lookahead.insert(p.first->lookahead.begin(), p.first->lookahead.end());
z = z || dest->lookahead.size() != prev_size;
}
}
}while(z);
}
states c_closure(grammar const &g, states const &c){
states c_prime;
for(items const &s : c){
c_prime.insert(lr1_closure(g, s));
}
return c_prime;
}
make_result make2(
grammar const &g,
states const &c,
symbol_data_map const &symbol_data_map,
item const &start
){
start.lookahead.insert(eos_functor()());
make_result result;
lr0_kernel_states s_prime;
rule_to_num &r2n(result.r2n);
num_to_rule &n2r(result.n2r);
{
std::size_t i = 0;
for(auto const &r : g){
for(auto const &rr : r.second){
n2r[i] = std::make_pair(r.first, &rr);
r2n[&rr] = i;
++i;
}
}
}
state_to_num &s2n(result.s2n);
num_to_state &n2s(result.n2s);
{
std::size_t i = 0;
for(auto const &s : c){
s_prime.insert(&s);
s2n[&s] = i;
n2s[i] = &s;
++i;
}
}
lr_parsing_table &parsing_table(result.parsing_table);
lr_conflict_set &conflict_set(result.conflict_set);
auto priority_sup = [&](term_sequence const &seq){
symbol_data_type p;
for(auto &t : seq){
if(!is_terminal_symbol_functor()(t, g)){ continue; }
auto iter = symbol_data_map.find(t);
if(iter == symbol_data_map.end()){ continue; }
symbol_data_type const &data = iter->second;
if(data.priority > p.priority){
p = data;
}
}
return p;
};
auto insert_conflict = [&](lr_parsing_table_item const &x, lr_parsing_table_item const &y){
conflict_set.insert(lr_conflict({ x, y }));
};
for(items const &state : c){
for(item const &i : state){
if(i.is_over()){
if(i.lhs == start.lhs && i.rhs == start.rhs && i.lookahead == start.lookahead){
// accept
lr_parsing_table_item a;
a.action = lr_parsing_table_item::enum_action::accept;
a.num = 0;
a.item_ptr = &i;
auto p = parsing_table[s2n[&state]].insert(std::make_pair(eos_functor()(), a));
if(!p.second){
insert_conflict(p.first->second, a);
}
}else{
// reduce
lr_parsing_table_item a;
a.action = lr_parsing_table_item::enum_action::reduce;
a.num = r2n[&*g.find(i.lhs)->second.find(i.rhs)];
a.item_ptr = &i;
for(term const &t : i.lookahead){
auto &parsing_table_item = parsing_table[s2n[&state]];
auto p = parsing_table_item.insert(std::make_pair(t, a));
if(!p.second){
lr_parsing_table_item const &other = p.first->second;
if(other.action == lr_parsing_table_item::enum_action::reduce){
insert_conflict(other, a);
}else{
term const &tag = i.rhs.tag, &other_tag = other.item_ptr->rhs.tag;
symbol_data_type symbol_data, other_symbol_data;
if(tag == epsilon_functor()()){
symbol_data = priority_sup(i.rhs);
}
if(other_tag == epsilon_functor()()){
other_symbol_data = priority_sup(other.item_ptr->rhs);
}
if(symbol_data.priority > other_symbol_data.priority){
p.first->second = a;
}else if(symbol_data.priority == other_symbol_data.priority){
if(symbol_data.dir == linkdir::left && other_symbol_data.dir != linkdir::left){
p.first->second = a;
}
}
}
}
}
}
}else{
if(is_terminal_symbol_functor()(i.curr(), g)){
// shift
lr_parsing_table_item a;
items n_goto = lr1_goto(g, state, i.curr());
a.action = lr_parsing_table_item::enum_action::shift;
a.num = s2n[&n_goto];
a.item_ptr = &i;
auto &parsing_table_item = parsing_table[s2n[&state]];
auto p = parsing_table_item.insert(std::make_pair(i.curr(), a));
if(!p.second){
lr_parsing_table_item const &other = p.first->second;
if(other.action == lr_parsing_table_item::enum_action::reduce){
symbol_data_type symbol_data, other_symbol_data;
term const &tag = a.item_ptr->rhs.tag;
auto iter = symbol_data_map.find(tag), other_iter = symbol_data_map.find(i.curr());
if(iter != symbol_data_map.end()){
symbol_data = iter->second;
}else{
symbol_data = priority_sup(i.rhs);
}
if(other_iter != symbol_data_map.end()){
other_symbol_data = other_iter->second;
}
if(symbol_data.priority > other_symbol_data.priority){
p.first->second = a;
}else if(symbol_data.priority == other_symbol_data.priority){
if(symbol_data.dir == linkdir::left && other_symbol_data.dir != linkdir::left){
p.first->second = a;
}
}
}
}
}else if(i.pos == 0 && i.lhs == start.lhs && i.rhs == start.rhs && i.lookahead == start.lookahead){
result.first = s2n[&state];
}
}
}
}
lr_goto_table &goto_table(result.goto_table);
for(auto const &p : s2n){
auto &goto_map(goto_table[p.second]);
for(auto const &r : g){
items n_goto = lr1_goto(g, *p.first, r.first);
if(s2n.find(&n_goto) != s2n.end()){
goto_map.insert(std::make_pair(r.first, s2n[&n_goto]));
}
}
}
return result;
}
static std::ostream &out_term_set(std::ostream &os, term_set const &s){
for(auto iter = s.begin(); iter != s.end(); ++iter){
os << *iter;
auto jter = iter;
++jter;
if(jter != s.end()){
os << ", ";
}
}
return os;
}
static std::ostream &out_item(std::ostream &os, item const &i){
os << i.lhs;
os << " -> ";
for(std::size_t count = 0; count < i.pos; ++count){
os << i.rhs[count] << " ";
}
os << ".";
for(std::size_t count = i.pos; count < i.rhs.size(); ++count){
os << i.rhs[count] << " ";
}
os << " [";
for(auto iter = i.lookahead.begin(); iter != i.lookahead.end(); ++iter){
os << *iter << " ";
}
os << "]";
return os;
}
static std::ostream &out_items(std::ostream &os, items const &is){
for(auto iter = is.begin(); iter != is.end(); ++iter){
out_item(os, *iter) << "\n";
}
return os;
}
static std::ostream &out_states(std::ostream &os, states const &c){
std::size_t count = 0;
for(auto iter = c.begin(); iter != c.end(); ++iter, ++count){
os << "-------- " << count << "\n";
out_items(os, *iter);
os << "\n";
}
return os;
}
};
#include <set>
#include <string>
#include <iostream>
#include <limits>
#include <iterator>
#include <regex>
#include <memory>
#include <fstream>
namespace scanner{
template<class ValueType, class TermType>
struct basic_token{
using value_type = ValueType;
using term_type = TermType;
value_type value;
term_type term = term_type();
std::size_t char_num = 0, word_size = 0, line_num = 0;
basic_token() = default;
basic_token &operator =(basic_token const &other){
value = other.value;
term = other.term;
char_num = other.char_num;
word_size = other.word_size;
line_num = other.line_num;
return *this;
}
basic_token(basic_token const &other) :
value(other.value), term(other.term),
char_num(other.char_num), line_num(other.line_num)
{}
basic_token(basic_token &&other) :
value(std::move(other.value)), term(std::move(other.term)),
char_num(other.char_num), line_num(other.line_num)
{}
~basic_token() = default;
static basic_token const &dummy_storage(){
static basic_token storage;
return storage;
}
};
template<class String>
class string_iter_pair{
private:
typename String::const_iterator begin_, end_;
public:
using value_type = typename String::const_iterator::value_type;
using iterator_type = typename String::const_iterator;
string_iter_pair() = default;
string_iter_pair(iterator_type begin_, iterator_type end) : begin_(begin_), end_(end){}
iterator_type begin(){
return begin_;
}
~string_iter_pair() = default;
string_iter_pair &operator =(string_iter_pair const &other){
begin_ = other.begin_;
end_ = other.end_;
return *this;
}
bool operator <(string_iter_pair const &other) const{
return std::lexicographical_compare(begin(), end(), other.begin(), other.end());
}
bool operator ==(char const *str) const{
iterator_type iter = begin_;
for(; iter != end_ && *str != '\0'; ++iter, ++str){
if(*iter != *str){ return false; }
}
return iter == end_ && *str == '\0';
}
iterator_type end(){
return end_;
}
iterator_type begin() const{
return begin_;
}
iterator_type end() const{
return end_;
}
iterator_type cbegin() const{
return begin_;
}
iterator_type cend() const{
return end_;
}
bool empty() const{
return begin_ == end_;
}
std::string to_str() const{
return std::string(begin_, end_);
}
};
struct scanning_exception{
std::string message;
std::size_t char_num = 0, word_num = 0, line_num = 0;
scanning_exception() = default;
scanning_exception(const std::string &message, std::size_t char_num, std::size_t word_num, std::size_t line_num)
: message(message), char_num(char_num), word_num(word_num), line_num(line_num){}
~scanning_exception() = default;
};
using scanning_exception_seq = std::vector<scanning_exception>;
using term_type = int;
class{
private:
int terminal_counter = 1, nonterminal_counter = -1;
std::map<std::string, term_type> str_to_term;
std::map<term_type, std::string> term_to_str;
public:
term_type set_terminal(std::string const &str){
auto iter = str_to_term.find(str);
if(iter != str_to_term.end()){
return iter->second;
}else{
str_to_term.insert(std::make_pair(str, terminal_counter));
term_to_str.insert(std::make_pair(terminal_counter, str));
return terminal_counter++;
}
}
term_type set_nonterminal(std::string const &str){
auto iter = str_to_term.find(str);
if(iter != str_to_term.end()){
return iter->second;
}else{
str_to_term.insert(std::make_pair(str, nonterminal_counter));
term_to_str.insert(std::make_pair(nonterminal_counter, str));
return nonterminal_counter--;
}
}
std::string const &to_str(term_type t) const{
return term_to_str.find(t)->second;
}
} symbol_manager;
struct eos_functor{
term_type operator ()() const{
return std::numeric_limits<int>::max();
}
};
struct dummy_functor{
term_type operator ()() const{
return std::numeric_limits<term_type>::max() - 1;
}
};
struct whitespace_functor{
term_type operator ()() const{
return std::numeric_limits<term_type>::max() - 2;
}
};
struct epsilon_functor{
term_type operator ()() const{
return 0;
}
};
struct is_terminal_symbol{
template<class G>
bool operator ()(term_type a, G const &) const{
return a > 0;
}
};
term_type const
value = symbol_manager.set_terminal("value"),
comma = symbol_manager.set_terminal("comma"),
dot = symbol_manager.set_terminal("dot"),
question = symbol_manager.set_terminal("question"),
exclamation = symbol_manager.set_terminal("exclamation"),
plus = symbol_manager.set_terminal("plus"),
hyphen = symbol_manager.set_terminal("hyphen"),
asterisk = symbol_manager.set_terminal("asterisk"),
slash = symbol_manager.set_terminal("slash"),
ampersand = symbol_manager.set_terminal("ampersand"),
double_colon = symbol_manager.set_terminal("double_colon"),
colon = symbol_manager.set_terminal("colon"),
semicolon = symbol_manager.set_terminal("semicolon"),
l_square_bracket = symbol_manager.set_terminal("l_square_bracket"),
r_square_bracket = symbol_manager.set_terminal("r_square_bracket"),
l_curly_bracket = symbol_manager.set_terminal("l_curly_bracket"),
r_curly_bracket = symbol_manager.set_terminal("r_curly_bracket"),
l_bracket = symbol_manager.set_terminal("l_bracket"),
r_bracket = symbol_manager.set_terminal("r_bracket"),
l_round_paren = symbol_manager.set_terminal("l_round_paren"),
r_round_paren = symbol_manager.set_terminal("r_round_paren"),
vertical_bar = symbol_manager.set_terminal("vertical_bar"),
equal = symbol_manager.set_terminal("equal"),
string = symbol_manager.set_terminal("string"),
identifier = symbol_manager.set_terminal("identifier");
using vstring = std::vector<char>;
using vstring_range = string_iter_pair<vstring>;
using token_type = basic_token<vstring_range, term_type>;
class lexer{
private:
std::map<term_type, std::regex> regex_map_subst;
std::vector<token_type> token_seq_subst;
std::size_t char_count = 0, line_count = 0;
std::size_t tab_width;
term_type t(std::string const &str){
return symbol_manager.set_terminal(str);
}
public:
decltype(regex_map_subst) const &regex_map = regex_map_subst;
decltype(token_seq_subst) const &token_seq = token_seq_subst;
lexer(std::size_t tab_width = 4) : tab_width(tab_width){}
void new_regex(const std::string &r, term_type token_kind){
std::regex regex("^(" + r + ")" , std::regex_constants::optimize);
regex_map_subst.insert(std::make_pair(token_kind, regex));
}
void clear_token_seq(){
token_seq_subst.clear();
char_count = 0;
line_count = 0;
}
void tokenize(vstring::const_iterator first, vstring::const_iterator last){
while(first != last){
bool match = false;
vstring::const_iterator longest_first = first, longest_last = first;
int current_term;
for(auto &e : regex_map){
std::match_results<vstring::const_iterator> match_results;
std::regex_search(first, last, match_results, e.second);
if(!match_results.empty() && match_results[0].first == first){
std::size_t b = longest_last - longest_first, a = match_results[0].str().size();
if(a > b){
longest_last = first + a;
current_term = e.first;
match = true;
}
}
}
if(match){
first = longest_last;
std::size_t char_count_before = char_count, line_count_before = line_count;
for(auto &i : vstring_range(longest_first, longest_last)){
if(i == '\n'){
++line_count;
char_count = 0;
}else if(i == '\t'){
char_count = (char_count / tab_width + 1) * tab_width;
}else{
++char_count;
}
}
if(current_term != whitespace_functor()()){
token_type t;
t.value = vstring_range(longest_first, longest_last);
t.term = current_term;
t.char_num = char_count_before;
t.word_size = char_count - char_count_before;
t.line_num = line_count_before;
token_seq_subst.push_back(t);
}
}else{
throw scanning_exception("lexical error.", char_count, 0, line_count);
}
}
token_type t;
t.term = eos_functor()();
token_seq_subst.push_back(t);
}
};
auto nt = [&](std::string const &str){
return symbol_manager.set_nonterminal(str);
};
class ast{
public:
token_type token;
std::vector<ast*> nodes;
ast() = default;
ast(token_type const &token, std::vector<ast*> const &nodes) : token(token), nodes(nodes){}
virtual ~ast(){
for(auto ptr : nodes){
delete ptr;
}
}
static ast const &dummy_storage(){
static ast storage(token_type::dummy_storage(), std::vector<ast*>());
return storage;
}
static ast const *dummy_storage_ptr(){
return &(dummy_storage());
}
};
class scanning_data_type{
public:
enum class linkdir{
nonassoc,
left,
right
};
std::vector<ast*> ast_stack;
ast const
*token_namespace,
*token_body,
*grammar_namespace,
*default_semantic_action = nullptr,
*expr_statements;
std::vector<ast const*> token_header_opt;
std::size_t current_token_priority = std::numeric_limits<std::size_t>::max();
struct symbol_data_type{
std::size_t priority;
vstring_range type, str;
linkdir dir;
};
using symbol_data_map_type = std::map<vstring_range, symbol_data_type>;
symbol_data_map_type symbol_data_map;
std::vector<symbol_data_map_type::const_iterator> ordered_token_iters;
scanning_exception_seq identifier_seq_error;
struct rhs_seq_element_type{
token_type identifier, arg;
bool operator <(rhs_seq_element_type const &other) const{
if(identifier.value < other.identifier.value){
return true;
}else if(other.identifier.value < identifier.value){
return false;
}else if(arg.value < other.arg.value){
return true;
}else{
return false;
}
}
};
class rhs_seq : public std::vector<rhs_seq_element_type>{
public:
using base = std::vector<rhs_seq_element_type>;
using base::base;
token_type semantic_action, tag;
};
class lhs : public token_type{
public:
using base = token_type;
using base::base;
token_type type;
bool operator<(lhs const &other) const{
return base::value < other.base::value;
}
};
using rhs_seq_set = std::set<rhs_seq>;
std::map<lhs, rhs_seq_set> rules;
scanning_exception_seq expr_statements_error;
~scanning_data_type(){
for(auto ptr : ast_stack){
delete ptr;
}
}
void collect_info(){
get_top_level_seq_statements(token_body);
if(identifier_seq_error.size() > 0){
throw(identifier_seq_error);
}
// debug.
for(auto iter : ordered_token_iters){
std::cout << iter->first.to_str() << " :\n";
std::cout << "\tpriority : " << std::numeric_limits<std::size_t>::max() - iter->second.priority << "\n";
std::cout << "\tlinkdir : " << (iter->second.dir == linkdir::nonassoc ? "nonassoc" : iter->second.dir == linkdir::left ? "left" : iter->second.dir == linkdir::right ? "right" : "") << "\n";
std::cout << "\ttype : " << iter->second.type.to_str() << "\n";
std::cout << "\tstr : " << iter->second.str.to_str() << std::endl;
}
get_expr_statements(expr_statements);
if(identifier_seq_error.size() > 0){
throw(expr_statements_error);
}
// debug.
for(auto &iter : rules){
std::cout << "----\n";
std::cout << iter.first.value.to_str() << "<" << iter.first.type.value.to_str() << "> :\n";
for(auto &jter : iter.second){
std::cout << "\t| ";
std::cout << "[" << jter.semantic_action.value.to_str() << "] ";
if(!jter.tag.value.empty()){
std::cout << "<" << jter.tag.value.to_str() << "> ";
}
for(auto &kter : jter){
std::cout << kter.identifier.value.to_str();
if(!kter.arg.value.empty()){
std::cout << "(";
std::cout << kter.arg.value.to_str();
std::cout << ")";
}
std::cout << " ";
}
std::cout << "\n";
}
}
std::cout << std::endl;
}
private:
ast const *get_arg_opt(ast const *ptr){
if(ptr->token.value.empty()){
return ast::dummy_storage_ptr();
}else{
return ptr->nodes[1];
}
}
ast const *get_semantic_action(ast const *ptr){
if(ptr->nodes[1]->token.value.empty()){
return ast::dummy_storage_ptr();
}else{
return ptr->nodes[1]->nodes[0];
}
}
ast const *get_tag_opt(ast const *ptr){
if(ptr->token.value.empty()){
return ast::dummy_storage_ptr();
}else{
return ptr->nodes[1];
}
}
void get_rhs_seq(ast const *ptr, rhs_seq &seq){
ast const *identifier, *arg;
if(ptr->nodes.size() == 3){
get_rhs_seq(ptr->nodes[0], seq);
identifier = ptr->nodes[1];
arg = get_arg_opt(ptr->nodes[2]);
}else{
identifier = ptr->nodes[0];
arg = get_arg_opt(ptr->nodes[1]);
}
rhs_seq_element_type rhs_seq_element;
rhs_seq_element.identifier = identifier->token;
rhs_seq_element.arg = arg->token;
seq.push_back(rhs_seq_element);
}
rhs_seq get_rhs_seq_opt(ast const *ptr){
rhs_seq s;
if(!ptr->token.value.empty()){
get_rhs_seq(ptr->nodes[0], s);
}
return s;
}
void get_rhs(ast const *ptr, rhs_seq_set &set){
ast const *semantic_action, *tag;
rhs_seq seq;
if(ptr->nodes.size() == 5){
get_rhs(ptr->nodes[0], set);
semantic_action = get_semantic_action(ptr->nodes[2]);
tag = get_tag_opt(ptr->nodes[3]);
seq = get_rhs_seq_opt(ptr->nodes[4]);
}else{
semantic_action = get_semantic_action(ptr->nodes[0]);
tag = get_tag_opt(ptr->nodes[1]);
seq = get_rhs_seq_opt(ptr->nodes[2]);
}
seq.semantic_action = semantic_action->token;
seq.tag = tag->token;
auto p = set.insert(seq);
if(!p.second){
expr_statements_error.push_back(scanning_exception("duplicated rhs.", ptr->token.char_num, ptr->token.word_size, ptr->token.line_num));
}
}
lhs get_lhs(ast const *ptr){
lhs x;
static_cast<token_type&>(x) = ptr->nodes[0]->token;
x.type = get_lhs_type(ptr->nodes[1])->token;
return x;
}
ast const *get_lhs_type(ast const *ptr){
return ptr->nodes[1];
}
void get_expr(ast const *ptr){
rhs_seq_set s;
get_rhs(ptr->nodes[2], s);
auto p = rules.insert(std::make_pair(get_lhs(ptr->nodes[0]), s));
if(!p.second){
expr_statements_error.push_back(scanning_exception("duplicated rule", ptr->nodes[0]->token.char_num, ptr->nodes[0]->token.word_size, ptr->nodes[0]->token.line_num));
}
}
void get_expr_statements(ast const *ptr){
if(ptr->nodes[0]->token.term == nt("ExprStatements")){
get_expr_statements(ptr->nodes[0]);
if(ptr->nodes.size() == 3){
get_expr(ptr->nodes[1]);
}
}else if(ptr->nodes[0]->token.term == nt("Expr")){
get_expr(ptr->nodes[0]);
}
}
void get_top_level_seq_statements(ast const *ptr){
if(ptr->nodes[0]->token.term == semicolon){
return;
}
if(ptr->nodes[0]->token.term == nt("TopLevelSeqStatementsElement")){
get_top_level_seq_statements_element(ptr->nodes[0]);
}else if(ptr->nodes[0]->token.term == nt("TopLevelSeqStatements")){
get_top_level_seq_statements(ptr->nodes[0]);
if(ptr->nodes[1]->token.term != semicolon){
get_top_level_seq_statements_element(ptr->nodes[1]);
}
}
}
void get_top_level_seq_statements_element(ast const *ptr){
if(ptr->nodes.size() == 2){
get_identifier_seq(ptr->nodes[0], linkdir::nonassoc);
}else{
get_block_with_link_dir(ptr->nodes[0]);
}
}
void get_block_with_link_dir(ast const *ptr){
get_seq_statements(ptr->nodes[2], get_link_dir(ptr->nodes[0]));
}
linkdir get_link_dir(ast const *ptr){
if(ptr->nodes[1]->token.value == "nonassoc"){
return linkdir::nonassoc;
}else if(ptr->nodes[1]->token.value == "left"){
return linkdir::left;
}else if(ptr->nodes[1]->token.value == "right"){
return linkdir::right;
}
throw(scanning_exception("link direction.", ptr->nodes[1]->token.char_num, ptr->nodes[1]->token.word_size, ptr->nodes[1]->token.line_num));
}
void get_seq_statements(ast const *ptr, linkdir dir){
if(ptr->nodes.size() < 2){
return;
}
if(ptr->nodes[0]->token.term == nt("SeqStatementsElement")){
get_seq_statements_element(ptr->nodes[0], dir);
}else if(ptr->nodes[0]->token.term == nt("SeqStatements")){
get_seq_statements(ptr->nodes[0], dir);
if(ptr->nodes.size() == 3){
get_seq_statements_element(ptr->nodes[1], dir);
}
}
}
void get_seq_statements_element(ast const *ptr, linkdir dir){
get_identifier_seq(ptr->nodes[0], dir);
--current_token_priority;
}
void get_identifier_seq(ast const *ptr, linkdir dir){
ast const *identifier, *symbol_type_opt, *symbol_str_opt;
if(ptr->nodes.size() == 5){
get_identifier_seq(ptr->nodes[0], dir);
identifier = ptr->nodes[2];
symbol_type_opt = get_symbol_type_opt(ptr->nodes[3]);
symbol_str_opt = get_symbol_str_opt(ptr->nodes[4]);
}else{
identifier = ptr->nodes[0];
symbol_type_opt = get_symbol_type_opt(ptr->nodes[1]);
symbol_str_opt = get_symbol_str_opt(ptr->nodes[2]);
}
symbol_data_type symbol_data;
symbol_data.priority = current_token_priority;
symbol_data.type = symbol_type_opt->token.value;
symbol_data.str = symbol_str_opt->token.value;
symbol_data.dir = dir;
std::pair<symbol_data_map_type::const_iterator, bool>
p = symbol_data_map.insert(std::make_pair(identifier->token.value, symbol_data));
if(!p.second){
identifier_seq_error.push_back(scanning_exception("token.", identifier->token.char_num, identifier->token.word_size, identifier->token.line_num));
}else{
ordered_token_iters.push_back(p.first);
}
}
ast const *get_symbol_type_opt(ast const *ptr){
if(ptr->token.value.empty()){
return ast::dummy_storage_ptr();
}else{
return ptr->nodes[1];
}
}
ast const *get_symbol_str_opt(ast const *ptr){
if(ptr->token.value.empty()){
return ast::dummy_storage_ptr();
}else{
return ptr->nodes[1];
}
}
} scanning_data;
using arg_type = std::vector<token_type>;
using semantic_type = std::function<token_type(term_type, arg_type const&, scanning_data_type&)>;
class scanner : public lalr<
semantic_type,
term_type,
eos_functor,
dummy_functor,
is_terminal_symbol,
epsilon_functor
>{
public:
template<class InputIter>
bool parse(make_result const &table, InputIter first, InputIter last){
std::vector<std::size_t> state_stack;
std::vector<token_type> value_stack;
state_stack.push_back(table.first);
while(true){
token_type const &value = *first;
term const &t = value.term;
std::size_t s = state_stack.back();
auto const &table_second(table.parsing_table.find(s)->second);
auto iter = table_second.find(t);
if(iter == table_second.end()){
throw scanning_exception("parsing error.", value.char_num, value.word_size, value.line_num);
}
lr_parsing_table_item const &i = table.parsing_table.find(s)->second.find(t)->second;
if(i.action == lr_parsing_table_item::enum_action::shift){
state_stack.push_back(i.num);
value_stack.push_back(value);
ast *a = new ast;
a->token = value;
scanning_data.ast_stack.push_back(a);
++first;
}else if(i.action == lr_parsing_table_item::enum_action::reduce){
auto &p(*table.n2r.find(i.num));
std::size_t norm = p.second.second->size();
state_stack.resize(state_stack.size() - norm);
if(state_stack.empty()){
throw scanning_exception("parsing error.", value.char_num, value.word_size, value.line_num);
}
state_stack.push_back(table.goto_table.find(state_stack.back())->second.find(p.second.first)->second);
decltype(value_stack) arg(value_stack.begin() + (value_stack.size() - norm), value_stack.end());
value_stack.resize(value_stack.size() - norm);
value_stack.push_back(p.second.second->semantic_data(p.second.first, arg, scanning_data));
}else if(i.action == lr_parsing_table_item::enum_action::accept){
auto &p(*table.n2r.find(i.num));
std::size_t norm = p.second.second->size();
state_stack.resize(state_stack.size() - norm);
if(state_stack.empty()){
throw scanning_exception("parsing error.", value.char_num, value.word_size, value.line_num);
}
decltype(value_stack) arg(value_stack.begin() + (value_stack.size() - norm), value_stack.end());
value_stack.resize(value_stack.size() - norm);
value_stack.push_back(p.second.second->semantic_data(p.second.first, arg, scanning_data));
break;
}
}
return first == last;
}
};
void init_lexer(lexer &lex){
lex.new_regex("( |\t|\r|\n|\r\n|(//[^\r\n]*(\r|\n|\r\n)))+", whitespace_functor()());
lex.new_regex("[0-9]+", value);
lex.new_regex(",", comma);
lex.new_regex("\\.", dot);
lex.new_regex("\\?", question);
lex.new_regex("!", exclamation);
lex.new_regex("\\+", plus);
lex.new_regex("-", hyphen);
lex.new_regex("\\*", asterisk);
lex.new_regex("\\/", slash);
lex.new_regex("&", ampersand);
lex.new_regex("::", double_colon);
lex.new_regex(":", colon);
lex.new_regex(";", semicolon);
lex.new_regex("\\[", l_square_bracket);
lex.new_regex("\\]", r_square_bracket);
lex.new_regex("\\{", l_curly_bracket);
lex.new_regex("\\}", r_curly_bracket);
lex.new_regex("<", l_bracket);
lex.new_regex(">", r_bracket);
lex.new_regex("\\(", l_round_paren);
lex.new_regex("\\)", r_round_paren);
lex.new_regex("\\|", vertical_bar);
lex.new_regex("=", equal);
lex.new_regex("\"([^\"]|\\\\\")+\"", string);
lex.new_regex("[a-zA-Z_][a-zA-Z0-9_]*", identifier);
}
void init_grammar(scanner::grammar &grammar){
using seq = scanner::term_sequence;
auto decl_g = [&](std::string const &str) -> scanner::rule_rhs&{
return grammar[symbol_manager.set_nonterminal(str)];
};
semantic_type eat = [](term_type term, arg_type const &arg, scanning_data_type &data){
token_type token;
token.term = term;
if(arg.size() > 0){
bool exists = false;
auto arg_head_iter = arg.begin();
for(; arg_head_iter != arg.end(); ++arg_head_iter){
if(!arg_head_iter->value.empty()){
exists = true;
break;
}
}
auto arg_tail_iter = arg.rbegin();
for(; arg_tail_iter != arg.rend(); ++arg_tail_iter){
if(!arg_tail_iter->value.empty()){
break;
}
}
if(exists){
token.value = vstring_range(arg_head_iter->value.begin(), arg_tail_iter->value.end());
}
}
ast *a = new ast;
a->nodes.resize(arg.size());
std::size_t i = 0;
for(auto iter = data.ast_stack.end() - arg.size(); iter != data.ast_stack.end(); ++iter, ++i){
a->nodes[i] = *iter;
}
a->token = token;
data.ast_stack.resize(data.ast_stack.size() - arg.size());
data.ast_stack.push_back(a);
return token; // auto
};
decl_g("IdentifierSeq") = scanner::rule_rhs({
seq({ identifier, nt("SymbolType_opt"), nt("SymbolStr_opt") }, eat),
seq({ nt("IdentifierSeq"), comma, identifier, nt("SymbolType_opt"), nt("SymbolStr_opt") }, eat)
});
decl_g("NonDelimIdentifierSeq") = scanner::rule_rhs({
seq({ identifier, nt("ReferenceSpecifier_opt") }, eat),
seq({ nt("NonDelimIdentifierSeq"), identifier, nt("ReferenceSpecifier_opt")}, eat),
seq({ nt("ReferenceSpecifier") }, eat)
});
decl_g("Type") = scanner::rule_rhs({
seq({ nt("DoubleColon_opt"), nt("NonDelimIdentifierSeq"), nt("Template_opt"), nt("NestIdentifier_opt") }, eat),
seq({ nt("Type"), nt("NonDelimIdentifierSeq") }, eat)
});
decl_g("ReferenceSpecifier") = scanner::rule_rhs({
seq({ asterisk }, eat),
seq({ ampersand }, eat)
});
decl_g("ReferenceSpecifier_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("ReferenceSpecifier") }, eat)
});
decl_g("DoubleColon_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ double_colon }, eat)
});
decl_g("NestIdentifier_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("NestIdentifier") }, eat)
});
decl_g("NestIdentifier") = scanner::rule_rhs({
seq({ double_colon, identifier, nt("Template_opt") }, eat),
seq({ dot, identifier, nt("Template_opt") }, eat),
seq({ nt("NestIdentifier"), double_colon, identifier, nt("Template_opt") }, eat),
seq({ nt("NestIdentifier"), dot, identifier, nt("Template_opt") }, eat)
});
decl_g("Template") = scanner::rule_rhs({
seq({ l_bracket, nt("TemplateArg_opt"), r_bracket }, eat)
});
decl_g("Template_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("Template") }, eat)
});
decl_g("TemplateArg_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("TypeSeq") }, eat)
});
decl_g("TypeSeq") = scanner::rule_rhs({
seq({ nt("Type") }, eat),
seq({ nt("Type"), nt("TypeSeqRest") }, eat)
});
decl_g("TypeSeqRest") = scanner::rule_rhs({
seq({ comma, nt("Type") }, eat),
seq({ nt("TypeSeqRest"), comma, nt("Type") }, eat)
});
decl_g("SymbolType_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_bracket, nt("Type"), r_bracket }, eat)
});
decl_g("SymbolStr_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ equal, string }, eat)
});
decl_g("LinkDir") = scanner::rule_rhs({
seq({ l_bracket, identifier, r_bracket }, eat)
});
decl_g("BlockWithLinkDir") = scanner::rule_rhs({
seq({ nt("LinkDir"), l_curly_bracket, nt("SeqStatements"), r_curly_bracket }, eat)
});
decl_g("SeqStatements") = scanner::rule_rhs({
seq({ nt("SeqStatementsElement"), semicolon }, eat),
seq({ semicolon }, eat),
seq({ nt("SeqStatements"), nt("SeqStatementsElement"), semicolon }, eat),
seq({ nt("SeqStatements"), semicolon }, eat)
});
decl_g("SeqStatementsElement") = scanner::rule_rhs({
seq({ nt("IdentifierSeq") }, eat)
});
decl_g("TopLevelSeqStatements") = scanner::rule_rhs({
seq({ nt("TopLevelSeqStatementsElement") }, eat),
seq({ semicolon }, eat),
seq({ nt("TopLevelSeqStatements"), nt("TopLevelSeqStatementsElement") }, eat),
seq({ nt("TopLevelSeqStatements"), semicolon }, eat),
});
decl_g("TopLevelSeqStatementsElement") = scanner::rule_rhs({
seq({ nt("IdentifierSeq"), semicolon }, eat),
seq({ nt("BlockWithLinkDir") }, eat)
});
decl_g("Arg_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_round_paren, value, r_round_paren }, eat)
});
decl_g("SemanticAction") = scanner::rule_rhs({
seq({ l_square_bracket, nt("SemanticActionElement_opt"), r_square_bracket }, eat)
});
decl_g("SemanticActionElement_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ identifier }, eat)
});
decl_g("Tag_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_bracket, identifier, r_bracket }, eat)
});
decl_g("RHSSeq") = scanner::rule_rhs({
seq({ identifier, nt("Arg_opt") }, eat),
seq({ nt("RHSSeq"), identifier, nt("Arg_opt") }, eat),
});
decl_g("RHSSeq_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("RHSSeq") }, eat)
});
decl_g("RHS") = scanner::rule_rhs({
seq({ nt("SemanticAction"), nt("Tag_opt"), nt("RHSSeq_opt") }, eat),
seq({ nt("RHS"), vertical_bar, nt("SemanticAction"), nt("Tag_opt"), nt("RHSSeq_opt") }, eat)
});
decl_g("LHS") = scanner::rule_rhs({
seq({ identifier, nt("LHSType") }, eat)
});
decl_g("LHSType") = scanner::rule_rhs({
seq({ l_bracket, nt("Type"), r_bracket }, eat)
});
decl_g("Expr") = scanner::rule_rhs({
seq({ nt("LHS"), colon, nt("RHS") }, eat)
});
decl_g("ExprStatements") = scanner::rule_rhs({
seq({ nt("Expr"), semicolon }, eat),
seq({ semicolon }, eat),
seq({ nt("ExprStatements"), nt("Expr"), semicolon }, eat),
seq({ nt("ExprStatements"), semicolon }, eat)
});
semantic_type token_header_opt = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.token_header_opt.push_back(data.ast_stack.back()->nodes[1]);
return t;
};
decl_g("TokenHeaderRest_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ comma, identifier }, token_header_opt)
});
decl_g("TokenHeader") = scanner::rule_rhs({
seq({ l_bracket, identifier, nt("TokenHeaderRest_opt"), r_bracket }, eat)
});
semantic_type token_body = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.token_body = data.ast_stack.back()->nodes[1];
return t;
};
decl_g("TokenBody") = scanner::rule_rhs({
seq({ l_curly_bracket, nt("TopLevelSeqStatements"), r_curly_bracket }, token_body)
});
decl_g("GrammarHeader") = scanner::rule_rhs({
seq({ l_bracket, identifier, r_bracket }, eat)
});
decl_g("DefaultSemanticAction_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_square_bracket, identifier, r_square_bracket }, eat)
});
semantic_type grammar_body = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.default_semantic_action = data.ast_stack.back()->nodes[1];
data.expr_statements = data.ast_stack.back()->nodes[2];
return t;
};
decl_g("GrammarBody") = scanner::rule_rhs({
seq({ l_curly_bracket, nt("DefaultSemanticAction_opt"), nt("ExprStatements"), r_curly_bracket }, grammar_body)
});
semantic_type token_namespace = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.token_namespace = data.ast_stack.back();
return t;
};
decl_g("TokenNamespace") = scanner::rule_rhs({
seq({ identifier }, token_namespace)
});
semantic_type grammar_namespace = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.grammar_namespace = data.ast_stack.back();
return t;
};
decl_g("GrammarNamespace") = scanner::rule_rhs({
seq({ identifier }, grammar_namespace)
});
decl_g("Start") = scanner::rule_rhs({
seq({
nt("TokenHeader"),
nt("TokenNamespace"),
nt("TokenBody"),
nt("GrammarHeader"),
nt("GrammarNamespace"),
nt("GrammarBody")
}, eat)
});
decl_g("S'") = scanner::rule_rhs({
seq({ nt("Start") }, eat)
});
}
void scan(const std::string ifile_path){
scanner::grammar grammar;
init_grammar(grammar);
scanner sc;
scanner::symbol_data_map symbol_data_map;
scanner::term_set terminal_symbol_set = scanner::make_terminal_symbol_set(grammar);
scanner::item s;
s.lhs = symbol_manager.set_nonterminal("S'");
s.rhs.push_back(symbol_manager.set_nonterminal("Start"));
s.pos = 0;
s.lookahead.insert(eos_functor()());
sc.make_follow_set(grammar, s.lhs);
scanner::states states_prime, states;
sc.lr0_kernel_items(grammar, states_prime, states, terminal_symbol_set, s);
sc.make_goto_map(grammar, terminal_symbol_set, states_prime, states, s);
sc.completion_lookahead(grammar, states, s);
states_prime.clear();
states = sc.c_closure(grammar, states);
scanner::make_result make_result = sc.make2(grammar, states, symbol_data_map, s);
std::cout << "conflict num: " << make_result.conflict_set.size() << std::endl;
if(!make_result.conflict_set.empty()){
std::cout << "bootstrap parser parsing error." << std::endl;
return;
}
std::ifstream ifile(ifile_path, std::ios::binary);
if(!ifile){
std::cout << "cannot open ifile." << std::endl;
return;
}
std::ifstream::streampos tell_beg, tell_eof;
ifile.seekg(0, std::ifstream::end);
tell_eof = ifile.tellg();
ifile.clear();
ifile.seekg(0, std::ifstream::beg);
tell_beg = ifile.tellg();
std::size_t ifile_size = tell_eof - tell_beg;
vstring string(ifile_size);
ifile.read(string.data(), ifile_size);
lexer lex;
init_lexer(lex);
try{
lex.tokenize(string.begin(), string.end());
sc.parse(make_result, lex.token_seq.begin(), lex.token_seq.end() - 1);
scanning_data.collect_info();
}catch(scanning_exception e){
std::cout << e.message << " : line " << e.line_num << ", char " << e.char_num << std::endl;
return;
}catch(scanning_exception_seq seq){
for(scanning_exception &e : seq){
std::cout << e.message << " : line " << e.line_num << ", char " << e.char_num << std::endl;
}
return;
}
std::cout << "success..." << std::endl;
return;
}
} // namespace scanner
int main(){
scanner::scan("grammar.txt");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment