Created
August 8, 2015 13:32
-
-
Save marionette-of-u/1f92f5d3a9e89e938718 to your computer and use it in GitHub Desktop.
C++11によるパーサジェネレータのブートストラップの記述例
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| https://gist.github.com/marionette-of-u/cd23016454f22bfb3383/019f24bfbdf60ba9025cd4ea664b051ebe678575 | |
| decl_g関数はBNF規則名を取り規則を保持するscanner::rule_rhsのインスタンスの参照を返す | |
| 中身はstd::map<std::string, scanner::rule_rhs>とほぼ同じ | |
| nt関数は非終端記号を内部処理可能な記号(=term(=int))に変換する | |
| seqクラスはコンストラクタに記号(=term(=int))の初期化リストとセマンティックアクションを取る | |
| 初期化リストはそのままBNF規則の右辺のひとつとして扱える | |
| ASTを構築してあとで解析するのでほとんどのセマンティックアクションはeat(食べる) | |
| scanner::rule_rhsクラスはコンストラクタにseqクラスの初期化リストを取る | |
| seqクラスのインスタンスはそのまま右辺のひとつの表現なので | |
| それらを複数個取るだけでBNF規則として扱える | |
| decl_g関数が返した参照に代入すれば定義完了 | |
| C++11になってboost非依存で特定の処理系にも非依存というスマートな記述になりました | |
| decl_g("IdentifierSeq") = scanner::rule_rhs({ | |
| seq({ identifier, nt("SymbolType_opt"), nt("SymbolStr_opt") }, eat), | |
| seq({ nt("IdentifierSeq"), comma, identifier, nt("SymbolType_opt"), nt("SymbolStr_opt") }, eat) | |
| }); | |
| decl_g("NonDelimIdentifierSeq") = scanner::rule_rhs({ | |
| seq({ identifier, nt("ReferenceSpecifier_opt") }, eat), | |
| seq({ nt("NonDelimIdentifierSeq"), identifier, nt("ReferenceSpecifier_opt")}, eat), | |
| seq({ nt("ReferenceSpecifier") }, eat) | |
| }); | |
| decl_g("Type") = scanner::rule_rhs({ | |
| seq({ nt("DoubleColon_opt"), nt("NonDelimIdentifierSeq"), nt("Template_opt"), nt("NestIdentifier_opt") }, eat), | |
| seq({ nt("Type"), nt("NonDelimIdentifierSeq") }, eat) | |
| }); | |
| decl_g("ReferenceSpecifier") = scanner::rule_rhs({ | |
| seq({ asterisk }, eat), | |
| seq({ ampersand }, eat) | |
| }); | |
| decl_g("ReferenceSpecifier_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ nt("ReferenceSpecifier") }, eat) | |
| }); | |
| decl_g("DoubleColon_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ double_colon }, eat) | |
| }); | |
| decl_g("NestIdentifier_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ nt("NestIdentifier") }, eat) | |
| }); | |
| decl_g("NestIdentifier") = scanner::rule_rhs({ | |
| seq({ double_colon, identifier, nt("Template_opt") }, eat), | |
| seq({ dot, identifier, nt("Template_opt") }, eat), | |
| seq({ nt("NestIdentifier"), double_colon, identifier, nt("Template_opt") }, eat), | |
| seq({ nt("NestIdentifier"), dot, identifier, nt("Template_opt") }, eat) | |
| }); | |
| decl_g("Template") = scanner::rule_rhs({ | |
| seq({ l_bracket, nt("TemplateArg_opt"), r_bracket }, eat) | |
| }); | |
| decl_g("Template_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ nt("Template") }, eat) | |
| }); | |
| decl_g("TemplateArg_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ nt("TypeSeq") }, eat) | |
| }); | |
| decl_g("TypeSeq") = scanner::rule_rhs({ | |
| seq({ nt("Type") }, eat), | |
| seq({ nt("Type"), nt("TypeSeqRest") }, eat) | |
| }); | |
| decl_g("TypeSeqRest") = scanner::rule_rhs({ | |
| seq({ comma, nt("Type") }, eat), | |
| seq({ nt("TypeSeqRest"), comma, nt("Type") }, eat) | |
| }); | |
| decl_g("SymbolType_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ l_bracket, nt("Type"), r_bracket }, eat) | |
| }); | |
| decl_g("SymbolStr_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ equal, string }, eat) | |
| }); | |
| decl_g("LinkDir") = scanner::rule_rhs({ | |
| seq({ l_bracket, identifier, r_bracket }, eat) | |
| }); | |
| decl_g("BlockWithLinkDir") = scanner::rule_rhs({ | |
| seq({ nt("LinkDir"), l_curly_bracket, nt("SeqStatements"), r_curly_bracket }, eat) | |
| }); | |
| decl_g("SeqStatements") = scanner::rule_rhs({ | |
| seq({ nt("SeqStatementsElement"), semicolon }, eat), | |
| seq({ semicolon }, eat), | |
| seq({ nt("SeqStatements"), nt("SeqStatementsElement"), semicolon }, eat), | |
| seq({ nt("SeqStatements"), semicolon }, eat) | |
| }); | |
| decl_g("SeqStatementsElement") = scanner::rule_rhs({ | |
| seq({ nt("IdentifierSeq") }, eat) | |
| }); | |
| decl_g("TopLevelSeqStatements") = scanner::rule_rhs({ | |
| seq({ nt("TopLevelSeqStatementsElement") }, eat), | |
| seq({ semicolon }, eat), | |
| seq({ nt("TopLevelSeqStatements"), nt("TopLevelSeqStatementsElement") }, eat), | |
| seq({ nt("TopLevelSeqStatements"), semicolon }, eat), | |
| }); | |
| decl_g("TopLevelSeqStatementsElement") = scanner::rule_rhs({ | |
| seq({ nt("IdentifierSeq"), semicolon }, eat), | |
| seq({ nt("BlockWithLinkDir") }, eat) | |
| }); | |
| decl_g("Arg_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ l_round_paren, value, r_round_paren }, eat) | |
| }); | |
| decl_g("SemanticAction") = scanner::rule_rhs({ | |
| seq({ l_square_bracket, nt("SemanticActionElement_opt"), r_square_bracket }, eat) | |
| }); | |
| decl_g("SemanticActionElement_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ identifier }, eat) | |
| }); | |
| decl_g("Tag_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ l_bracket, identifier, r_bracket }, eat) | |
| }); | |
| decl_g("RHSSeq") = scanner::rule_rhs({ | |
| seq({ identifier, nt("Arg_opt") }, eat), | |
| seq({ nt("RHSSeq"), identifier, nt("Arg_opt") }, eat), | |
| }); | |
| decl_g("RHSSeq_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ nt("RHSSeq") }, eat) | |
| }); | |
| decl_g("RHS") = scanner::rule_rhs({ | |
| seq({ nt("SemanticAction"), nt("Tag_opt"), nt("RHSSeq_opt") }, eat), | |
| seq({ nt("RHS"), vertical_bar, nt("SemanticAction"), nt("Tag_opt"), nt("RHSSeq_opt") }, eat) | |
| }); | |
| decl_g("LHS") = scanner::rule_rhs({ | |
| seq({ identifier, nt("LHSType") }, eat) | |
| }); | |
| decl_g("LHSType") = scanner::rule_rhs({ | |
| seq({ l_bracket, nt("Type"), r_bracket }, eat) | |
| }); | |
| decl_g("Expr") = scanner::rule_rhs({ | |
| seq({ nt("LHS"), colon, nt("RHS") }, eat) | |
| }); | |
| decl_g("ExprStatements") = scanner::rule_rhs({ | |
| seq({ nt("Expr"), semicolon }, eat), | |
| seq({ semicolon }, eat), | |
| seq({ nt("ExprStatements"), nt("Expr"), semicolon }, eat), | |
| seq({ nt("ExprStatements"), semicolon }, eat) | |
| }); | |
| semantic_type token_header_opt = [eat](term_type term, arg_type const &arg, scanning_data_type &data){ | |
| token_type t = eat(term, arg, data); | |
| data.token_header_opt.push_back(data.ast_stack.back()->nodes[1]); | |
| return t; | |
| }; | |
| decl_g("TokenHeaderRest_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ comma, identifier }, token_header_opt) | |
| }); | |
| decl_g("TokenHeader") = scanner::rule_rhs({ | |
| seq({ l_bracket, identifier, nt("TokenHeaderRest_opt"), r_bracket }, eat) | |
| }); | |
| semantic_type token_body = [eat](term_type term, arg_type const &arg, scanning_data_type &data){ | |
| token_type t = eat(term, arg, data); | |
| data.token_body = data.ast_stack.back()->nodes[1]; | |
| return t; | |
| }; | |
| decl_g("TokenBody") = scanner::rule_rhs({ | |
| seq({ l_curly_bracket, nt("TopLevelSeqStatements"), r_curly_bracket }, token_body) | |
| }); | |
| decl_g("GrammarHeader") = scanner::rule_rhs({ | |
| seq({ l_bracket, identifier, r_bracket }, eat) | |
| }); | |
| decl_g("DefaultSemanticAction_opt") = scanner::rule_rhs({ | |
| seq({}, eat), | |
| seq({ l_square_bracket, identifier, r_square_bracket }, eat) | |
| }); | |
| semantic_type grammar = [eat](term_type term, arg_type const &arg, scanning_data_type &data){ | |
| token_type t = eat(term, arg, data); | |
| data.default_semantic_action = data.ast_stack.back()->nodes[1]; | |
| data.expr_statements = data.ast_stack.back()->nodes[2]; | |
| return t; | |
| }; | |
| decl_g("GrammarBody") = scanner::rule_rhs({ | |
| seq({ l_curly_bracket, nt("DefaultSemanticAction_opt"), nt("ExprStatements"), r_curly_bracket }, grammar) | |
| }); | |
| semantic_type token_namespace = [eat](term_type term, arg_type const &arg, scanning_data_type &data){ | |
| token_type t = eat(term, arg, data); | |
| data.token_namespace = data.ast_stack.back(); | |
| return t; | |
| }; | |
| decl_g("TokenNamespace") = scanner::rule_rhs({ | |
| seq({ identifier }, token_namespace) | |
| }); | |
| semantic_type grammar_namespace = [eat](term_type term, arg_type const &arg, scanning_data_type &data){ | |
| token_type t = eat(term, arg, data); | |
| data.grammar_namespace = data.ast_stack.back(); | |
| return t; | |
| }; | |
| decl_g("GrammarNamespace") = scanner::rule_rhs({ | |
| seq({ identifier }, grammar_namespace) | |
| }); | |
| decl_g("Start") = scanner::rule_rhs({ | |
| seq({ | |
| nt("TokenHeader"), | |
| nt("TokenNamespace"), | |
| nt("TokenBody"), | |
| nt("GrammarHeader"), | |
| nt("GrammarNamespace"), | |
| nt("GrammarBody") | |
| }, eat) | |
| }); | |
| decl_g("S'") = scanner::rule_rhs({ | |
| seq({ nt("Start") }, eat) | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment