Skip to content

Instantly share code, notes, and snippets.

@marionette-of-u
Created August 8, 2015 13:32
Show Gist options
  • Save marionette-of-u/1f92f5d3a9e89e938718 to your computer and use it in GitHub Desktop.
Save marionette-of-u/1f92f5d3a9e89e938718 to your computer and use it in GitHub Desktop.
C++11によるパーサジェネレータのブートストラップの記述例
https://gist.github.com/marionette-of-u/cd23016454f22bfb3383/019f24bfbdf60ba9025cd4ea664b051ebe678575
decl_g関数はBNF規則名を取り規則を保持するscanner::rule_rhsのインスタンスの参照を返す
中身はstd::map<std::string, scanner::rule_rhs>とほぼ同じ
nt関数は非終端記号を内部処理可能な記号(=term(=int))に変換する
seqクラスはコンストラクタに記号(=term(=int))の初期化リストとセマンティックアクションを取る
初期化リストはそのままBNF規則の右辺のひとつとして扱える
ASTを構築してあとで解析するのでほとんどのセマンティックアクションはeat(食べる)
scanner::rule_rhsクラスはコンストラクタにseqクラスの初期化リストを取る
seqクラスのインスタンスはそのまま右辺のひとつの表現なので
それらを複数個取るだけでBNF規則として扱える
decl_g関数が返した参照に代入すれば定義完了
C++11になってboost非依存で特定の処理系にも非依存というスマートな記述になりました
decl_g("IdentifierSeq") = scanner::rule_rhs({
seq({ identifier, nt("SymbolType_opt"), nt("SymbolStr_opt") }, eat),
seq({ nt("IdentifierSeq"), comma, identifier, nt("SymbolType_opt"), nt("SymbolStr_opt") }, eat)
});
decl_g("NonDelimIdentifierSeq") = scanner::rule_rhs({
seq({ identifier, nt("ReferenceSpecifier_opt") }, eat),
seq({ nt("NonDelimIdentifierSeq"), identifier, nt("ReferenceSpecifier_opt")}, eat),
seq({ nt("ReferenceSpecifier") }, eat)
});
decl_g("Type") = scanner::rule_rhs({
seq({ nt("DoubleColon_opt"), nt("NonDelimIdentifierSeq"), nt("Template_opt"), nt("NestIdentifier_opt") }, eat),
seq({ nt("Type"), nt("NonDelimIdentifierSeq") }, eat)
});
decl_g("ReferenceSpecifier") = scanner::rule_rhs({
seq({ asterisk }, eat),
seq({ ampersand }, eat)
});
decl_g("ReferenceSpecifier_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("ReferenceSpecifier") }, eat)
});
decl_g("DoubleColon_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ double_colon }, eat)
});
decl_g("NestIdentifier_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("NestIdentifier") }, eat)
});
decl_g("NestIdentifier") = scanner::rule_rhs({
seq({ double_colon, identifier, nt("Template_opt") }, eat),
seq({ dot, identifier, nt("Template_opt") }, eat),
seq({ nt("NestIdentifier"), double_colon, identifier, nt("Template_opt") }, eat),
seq({ nt("NestIdentifier"), dot, identifier, nt("Template_opt") }, eat)
});
decl_g("Template") = scanner::rule_rhs({
seq({ l_bracket, nt("TemplateArg_opt"), r_bracket }, eat)
});
decl_g("Template_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("Template") }, eat)
});
decl_g("TemplateArg_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("TypeSeq") }, eat)
});
decl_g("TypeSeq") = scanner::rule_rhs({
seq({ nt("Type") }, eat),
seq({ nt("Type"), nt("TypeSeqRest") }, eat)
});
decl_g("TypeSeqRest") = scanner::rule_rhs({
seq({ comma, nt("Type") }, eat),
seq({ nt("TypeSeqRest"), comma, nt("Type") }, eat)
});
decl_g("SymbolType_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_bracket, nt("Type"), r_bracket }, eat)
});
decl_g("SymbolStr_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ equal, string }, eat)
});
decl_g("LinkDir") = scanner::rule_rhs({
seq({ l_bracket, identifier, r_bracket }, eat)
});
decl_g("BlockWithLinkDir") = scanner::rule_rhs({
seq({ nt("LinkDir"), l_curly_bracket, nt("SeqStatements"), r_curly_bracket }, eat)
});
decl_g("SeqStatements") = scanner::rule_rhs({
seq({ nt("SeqStatementsElement"), semicolon }, eat),
seq({ semicolon }, eat),
seq({ nt("SeqStatements"), nt("SeqStatementsElement"), semicolon }, eat),
seq({ nt("SeqStatements"), semicolon }, eat)
});
decl_g("SeqStatementsElement") = scanner::rule_rhs({
seq({ nt("IdentifierSeq") }, eat)
});
decl_g("TopLevelSeqStatements") = scanner::rule_rhs({
seq({ nt("TopLevelSeqStatementsElement") }, eat),
seq({ semicolon }, eat),
seq({ nt("TopLevelSeqStatements"), nt("TopLevelSeqStatementsElement") }, eat),
seq({ nt("TopLevelSeqStatements"), semicolon }, eat),
});
decl_g("TopLevelSeqStatementsElement") = scanner::rule_rhs({
seq({ nt("IdentifierSeq"), semicolon }, eat),
seq({ nt("BlockWithLinkDir") }, eat)
});
decl_g("Arg_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_round_paren, value, r_round_paren }, eat)
});
decl_g("SemanticAction") = scanner::rule_rhs({
seq({ l_square_bracket, nt("SemanticActionElement_opt"), r_square_bracket }, eat)
});
decl_g("SemanticActionElement_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ identifier }, eat)
});
decl_g("Tag_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_bracket, identifier, r_bracket }, eat)
});
decl_g("RHSSeq") = scanner::rule_rhs({
seq({ identifier, nt("Arg_opt") }, eat),
seq({ nt("RHSSeq"), identifier, nt("Arg_opt") }, eat),
});
decl_g("RHSSeq_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ nt("RHSSeq") }, eat)
});
decl_g("RHS") = scanner::rule_rhs({
seq({ nt("SemanticAction"), nt("Tag_opt"), nt("RHSSeq_opt") }, eat),
seq({ nt("RHS"), vertical_bar, nt("SemanticAction"), nt("Tag_opt"), nt("RHSSeq_opt") }, eat)
});
decl_g("LHS") = scanner::rule_rhs({
seq({ identifier, nt("LHSType") }, eat)
});
decl_g("LHSType") = scanner::rule_rhs({
seq({ l_bracket, nt("Type"), r_bracket }, eat)
});
decl_g("Expr") = scanner::rule_rhs({
seq({ nt("LHS"), colon, nt("RHS") }, eat)
});
decl_g("ExprStatements") = scanner::rule_rhs({
seq({ nt("Expr"), semicolon }, eat),
seq({ semicolon }, eat),
seq({ nt("ExprStatements"), nt("Expr"), semicolon }, eat),
seq({ nt("ExprStatements"), semicolon }, eat)
});
semantic_type token_header_opt = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.token_header_opt.push_back(data.ast_stack.back()->nodes[1]);
return t;
};
decl_g("TokenHeaderRest_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ comma, identifier }, token_header_opt)
});
decl_g("TokenHeader") = scanner::rule_rhs({
seq({ l_bracket, identifier, nt("TokenHeaderRest_opt"), r_bracket }, eat)
});
semantic_type token_body = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.token_body = data.ast_stack.back()->nodes[1];
return t;
};
decl_g("TokenBody") = scanner::rule_rhs({
seq({ l_curly_bracket, nt("TopLevelSeqStatements"), r_curly_bracket }, token_body)
});
decl_g("GrammarHeader") = scanner::rule_rhs({
seq({ l_bracket, identifier, r_bracket }, eat)
});
decl_g("DefaultSemanticAction_opt") = scanner::rule_rhs({
seq({}, eat),
seq({ l_square_bracket, identifier, r_square_bracket }, eat)
});
semantic_type grammar = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.default_semantic_action = data.ast_stack.back()->nodes[1];
data.expr_statements = data.ast_stack.back()->nodes[2];
return t;
};
decl_g("GrammarBody") = scanner::rule_rhs({
seq({ l_curly_bracket, nt("DefaultSemanticAction_opt"), nt("ExprStatements"), r_curly_bracket }, grammar)
});
semantic_type token_namespace = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.token_namespace = data.ast_stack.back();
return t;
};
decl_g("TokenNamespace") = scanner::rule_rhs({
seq({ identifier }, token_namespace)
});
semantic_type grammar_namespace = [eat](term_type term, arg_type const &arg, scanning_data_type &data){
token_type t = eat(term, arg, data);
data.grammar_namespace = data.ast_stack.back();
return t;
};
decl_g("GrammarNamespace") = scanner::rule_rhs({
seq({ identifier }, grammar_namespace)
});
decl_g("Start") = scanner::rule_rhs({
seq({
nt("TokenHeader"),
nt("TokenNamespace"),
nt("TokenBody"),
nt("GrammarHeader"),
nt("GrammarNamespace"),
nt("GrammarBody")
}, eat)
});
decl_g("S'") = scanner::rule_rhs({
seq({ nt("Start") }, eat)
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment