Created
March 9, 2014 18:52
-
-
Save odashi/9452545 to your computer and use it in GitHub Desktop.
Boost.SpiritによるPascalのパーサ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // parser_pascal.cpp | |
| #include <iostream> | |
| #include <string> | |
| #define BOOST_SPIRIT_USE_OLD_NAMESPACE | |
| //#define BOOST_SPIRIT_DEBUG | |
| #include <boost/spirit/include/classic.hpp> | |
| #include <boost/spirit/include/classic_ast.hpp> | |
| #include "parser.h" | |
| using namespace boost::spirit; | |
| //////////////////////////////////////////////////////////////////////////////// | |
| // | |
| // struct PascalSkipParser | |
| // | |
| // @brief: Pascalの文法定義 | |
| // @update: 2011/10/08 by Y.ODA | |
| // | |
| //////////////////////////////////////////////////////////////////////////////// | |
| struct PascalSkipParser : public grammar<PascalSkipParser> | |
| { | |
| template<typename Scannar> | |
| struct definition | |
| { | |
| rule<Scannar> skip_p; | |
| // 構文定義 | |
| definition(const PascalSkipParser &self) | |
| { | |
| skip_p = +space_p | comment_p('{', '}'); | |
| // デバッグ出力(出力しない) | |
| BOOST_SPIRIT_DEBUG_TRACE_NODE(self, false); | |
| BOOST_SPIRIT_DEBUG_TRACE_NODE(skip_p, false); | |
| } | |
| // 開始記号 | |
| const rule<Scannar> &start() const | |
| { | |
| return skip_p; | |
| } | |
| }; | |
| }; | |
| //////////////////////////////////////////////////////////////////////////////// | |
| // | |
| // struct PascalParser | |
| // | |
| // @brief: Pascalの文法定義 | |
| // @update: 2011/10/08 by Y.ODA | |
| // | |
| //////////////////////////////////////////////////////////////////////////////// | |
| struct PascalParser : public grammar<PascalParser> | |
| { | |
| enum Id | |
| { | |
| ID_BASETYPE = 1, | |
| ID_IDENTIFIER, | |
| ID_LITERAL, | |
| ID_PROGRAM, | |
| ID_DEF_BLOCK, | |
| ID_CODE_BLOCK, | |
| ID_CONST_DEF, | |
| ID_TYPE_DEF, | |
| ID_VARIABLE_DEF, | |
| ID_PROCEDUE_DEF, | |
| ID_FUNCTION_DEF, | |
| ID_ARG_DEF, | |
| ID_TYPE_DESC, | |
| ID_ARRAY_DESC, | |
| ID_RECORD_DESC, | |
| ID_STATEMENT, | |
| ID_IF_BLOCK, | |
| ID_CASE_BLOCK, | |
| ID_FOR_BLOCK, | |
| ID_WHILE_BLOCK, | |
| ID_REPEAT_BLOCK, | |
| ID_PROCEDURE_CALL, | |
| ID_SUBSTITUTION, | |
| ID_VARIABLE, | |
| ID_EXPRESSION, | |
| ID_SIMPLE_EXPRESSION, | |
| ID_TERM, | |
| ID_FACTOR | |
| }; | |
| template<typename Scannar> | |
| struct definition | |
| { | |
| #define RULE(id) rule<Scannar, parser_context<>, parser_tag<id>> | |
| rule<Scannar> keyword; | |
| RULE(ID_BASETYPE) basetype; | |
| RULE(ID_IDENTIFIER) identifier; | |
| RULE(ID_LITERAL) literal_p; | |
| rule<Scannar> string_p; | |
| RULE(ID_PROGRAM) program; | |
| RULE(ID_DEF_BLOCK) def_block; | |
| RULE(ID_CODE_BLOCK) code_block; | |
| RULE(ID_CONST_DEF) const_def; | |
| RULE(ID_TYPE_DEF) type_def; | |
| RULE(ID_VARIABLE_DEF) variable_def; | |
| RULE(ID_PROCEDUE_DEF) procedure_def; | |
| RULE(ID_FUNCTION_DEF) function_def; | |
| RULE(ID_ARG_DEF) arg_def; | |
| RULE(ID_TYPE_DESC) type_desc; | |
| RULE(ID_ARRAY_DESC) array_desc; | |
| RULE(ID_RECORD_DESC) record_desc; | |
| RULE(ID_STATEMENT) statement; | |
| RULE(ID_IF_BLOCK) if_block; | |
| RULE(ID_CASE_BLOCK) case_block; | |
| RULE(ID_FOR_BLOCK) for_block; | |
| RULE(ID_WHILE_BLOCK) while_block; | |
| RULE(ID_REPEAT_BLOCK) repeat_block; | |
| RULE(ID_PROCEDURE_CALL) procedure_call; | |
| RULE(ID_SUBSTITUTION) substitution; | |
| RULE(ID_VARIABLE) variable; | |
| RULE(ID_EXPRESSION) expression; | |
| RULE(ID_SIMPLE_EXPRESSION) simple_expression; | |
| RULE(ID_TERM) term; | |
| RULE(ID_FACTOR) factor; | |
| PascalSkipParser skip_p; | |
| // 構文定義 | |
| definition(const PascalParser &self) | |
| { | |
| keyword | |
| = str_p("begin") | "end" | |
| | "if" | "case" | "then" | "else" | "for" | "to" | "while" | "do" | "repeat" | "until" | |
| | "and" | "or" | "not" | "div" | "mod" | |
| | "array" | "of" | "record" | |
| | "const" | "type" | "var" | "procedure" | "function" | "program"; | |
| basetype | |
| = str_p("int") | "real"; | |
| identifier | |
| //= lexeme_d[(alpha_p | '_') >> *(alnum_p | '_')] - (keyword | basetype); | |
| = no_node_d[*skip_p] | |
| >> leaf_node_d[ | |
| lexeme_d[(alpha_p | '_') >> *(alnum_p | '_')] - (keyword | basetype) | |
| ]; | |
| literal_p | |
| // 整数と実数は最長一致とする | |
| = longest_d[int_p | real_p] | string_p; | |
| string_p | |
| //= lexeme_d[confix_p('\"', *c_escape_ch_p, '\"')]; | |
| = no_node_d[*skip_p] | |
| >> leaf_node_d[ | |
| lexeme_d[confix_p('\"', *c_escape_ch_p, '\"')] | |
| ]; | |
| program | |
| = no_node_d[str_p("program")] | |
| >> identifier | |
| >> no_node_d[ch_p('(')] | |
| >> !(identifier % no_node_d[ch_p(',')]) | |
| >> no_node_d[ch_p(')')] | |
| >> no_node_d[ch_p(';')] | |
| >> def_block | |
| >> code_block | |
| >> no_node_d[ch_p('.')]; | |
| def_block | |
| = *( const_def | |
| | type_def | |
| | variable_def | |
| | function_def | |
| | procedure_def); | |
| code_block | |
| = no_node_d[str_p("begin")] | |
| >> !(statement % no_node_d[ch_p(';')]) | |
| >> no_node_d[str_p("end")]; | |
| const_def | |
| = no_node_d[str_p("const")] | |
| >> +( identifier | |
| >> no_node_d[ch_p('=')] | |
| >> literal_p | |
| >> no_node_d[ch_p(';')]); | |
| type_def | |
| = no_node_d[str_p("type")] | |
| >> +( identifier | |
| >> no_node_d[ch_p('=')] | |
| >> type_desc | |
| >> no_node_d[ch_p(';')]); | |
| variable_def | |
| = no_node_d[str_p("var")] | |
| >> +( identifier | |
| >> *(no_node_d[ch_p(',')] >> identifier) | |
| >> no_node_d[ch_p(':')] | |
| >> type_desc | |
| >> no_node_d[ch_p(';')]); | |
| procedure_def | |
| = no_node_d[str_p("procedure")] | |
| >> identifier | |
| >> no_node_d[str_p('(')] | |
| >> !(arg_def % no_node_d[str_p(',')]) | |
| >> no_node_d[str_p(')')] | |
| >> no_node_d[str_p(';')] | |
| >> def_block | |
| >> code_block | |
| >> no_node_d[str_p(';')]; | |
| function_def | |
| = no_node_d[str_p("function")] | |
| >> identifier | |
| >> no_node_d[str_p('(')] | |
| >> !(arg_def % no_node_d[str_p(',')]) | |
| >> no_node_d[str_p(')')] | |
| >> no_node_d[str_p(':')] | |
| >> type_desc | |
| >> no_node_d[str_p(';')] | |
| >> def_block | |
| >> code_block | |
| >> no_node_d[str_p(';')]; | |
| arg_def | |
| = !str_p("var") | |
| >> identifier | |
| >> no_node_d[ch_p(':')] | |
| >> type_desc; | |
| type_desc | |
| = identifier | |
| | basetype | |
| | array_desc | |
| | record_desc; | |
| array_desc | |
| = no_node_d[str_p("array")] | |
| >> no_node_d[ch_p('[')] | |
| >> int_p | |
| >> no_node_d[str_p("..")] | |
| >> int_p | |
| >> no_node_d[ch_p(']')] | |
| >> no_node_d[str_p("of")] | |
| >> type_desc; | |
| record_desc | |
| = no_node_d[str_p("record")] | |
| >> +( identifier | |
| >> no_node_d[ch_p(':')] | |
| >> type_desc | |
| >> no_node_d[ch_p(';')]) | |
| >> no_node_d[str_p("end")]; | |
| statement | |
| = if_block | |
| | case_block | |
| | for_block | |
| | while_block | |
| | repeat_block | |
| | code_block | |
| | procedure_call | |
| | substitution | |
| | identifier; // 括弧省略の手続き/関数呼び出しをここで判定 | |
| if_block | |
| = no_node_d[str_p("if")] | |
| >> expression | |
| >> no_node_d[str_p("then")] | |
| >> statement | |
| >> !( no_node_d[str_p("else")] | |
| >> statement); | |
| case_block | |
| = no_node_d[str_p("case")] | |
| >> expression | |
| >> no_node_d[str_p("of")] | |
| >> *( literal_p | |
| >> no_node_d[ch_p(':')] | |
| >> statement | |
| >> no_node_d[ch_p(';')]) | |
| >> !( no_node_d[str_p("else")] | |
| >> statement) | |
| >> no_node_d[str_p("end")]; | |
| for_block | |
| = no_node_d[str_p("for")] | |
| >> identifier | |
| >> no_node_d[str_p(":=")] | |
| >> expression | |
| >> no_node_d[str_p("to")] | |
| >> expression | |
| >> no_node_d[str_p("do")] | |
| >> statement; | |
| while_block | |
| = no_node_d[str_p("while")] | |
| >> expression | |
| >> no_node_d[str_p("do")] | |
| >> statement; | |
| repeat_block | |
| = no_node_d[str_p("repeat")] | |
| >> (statement % no_node_d[ch_p(';')]) | |
| >> no_node_d[str_p("until")] | |
| >> expression; | |
| procedure_call | |
| // 引数なしなら括弧は省略できるが、解析が面倒なので別で判定する | |
| = identifier | |
| >> no_node_d[ch_p('(')] | |
| >> !(expression % no_node_d[ch_p(',')]) | |
| >> no_node_d[ch_p(')')]; | |
| substitution | |
| = variable | |
| >> no_node_d[str_p(":=")] | |
| >> expression; | |
| variable | |
| = identifier | |
| >> !( no_node_d[ch_p('[')] | |
| >> (expression % no_node_d[ch_p(',')]) | |
| >> no_node_d[ch_p(']')]) | |
| >> !( no_node_d[ch_p('.')] | |
| >> variable); | |
| expression | |
| = simple_expression | |
| >> !( (ch_p('=')|"<>"|'<'|'>'|"<="|">=") | |
| >> simple_expression); | |
| simple_expression | |
| = !(ch_p('+')|'-') | |
| >> term | |
| >> *( (ch_p('+')|'-'|"or") | |
| >> term); | |
| term | |
| = factor | |
| >> *( (ch_p('*')|'/'|"div"|"mod"|"and") | |
| >> factor); | |
| factor | |
| // 括弧省略の手続き/関数を変数と見分ける方法がないので意味解析に任せる | |
| = literal_p | |
| | procedure_call | |
| | variable | |
| | (no_node_d[ch_p('(')] >> expression >> no_node_d[ch_p(')')]) | |
| | ("not" >> expression); | |
| // デバッグ出力 | |
| BOOST_SPIRIT_DEBUG_RULE(keyword); | |
| BOOST_SPIRIT_DEBUG_RULE(basetype); | |
| BOOST_SPIRIT_DEBUG_RULE(identifier); | |
| BOOST_SPIRIT_DEBUG_RULE(literal_p); | |
| BOOST_SPIRIT_DEBUG_RULE(string_p); | |
| BOOST_SPIRIT_DEBUG_RULE(program); | |
| BOOST_SPIRIT_DEBUG_RULE(def_block); | |
| BOOST_SPIRIT_DEBUG_RULE(code_block); | |
| BOOST_SPIRIT_DEBUG_RULE(const_def); | |
| BOOST_SPIRIT_DEBUG_RULE(type_def); | |
| BOOST_SPIRIT_DEBUG_RULE(variable_def); | |
| BOOST_SPIRIT_DEBUG_RULE(procedure_def); | |
| BOOST_SPIRIT_DEBUG_RULE(function_def); | |
| BOOST_SPIRIT_DEBUG_RULE(arg_def); | |
| BOOST_SPIRIT_DEBUG_RULE(type_desc); | |
| BOOST_SPIRIT_DEBUG_RULE(array_desc); | |
| BOOST_SPIRIT_DEBUG_RULE(record_desc); | |
| BOOST_SPIRIT_DEBUG_RULE(statement); | |
| BOOST_SPIRIT_DEBUG_RULE(if_block); | |
| BOOST_SPIRIT_DEBUG_RULE(case_block); | |
| BOOST_SPIRIT_DEBUG_RULE(for_block); | |
| BOOST_SPIRIT_DEBUG_RULE(while_block); | |
| BOOST_SPIRIT_DEBUG_RULE(repeat_block); | |
| BOOST_SPIRIT_DEBUG_RULE(procedure_call); | |
| BOOST_SPIRIT_DEBUG_RULE(substitution); | |
| BOOST_SPIRIT_DEBUG_RULE(variable); | |
| BOOST_SPIRIT_DEBUG_RULE(expression); | |
| BOOST_SPIRIT_DEBUG_RULE(simple_expression); | |
| BOOST_SPIRIT_DEBUG_RULE(term); | |
| BOOST_SPIRIT_DEBUG_RULE(factor); | |
| } | |
| // 開始記号 | |
| const RULE(ID_PROGRAM) &start() const | |
| { | |
| return program; | |
| } | |
| #undef RULE | |
| }; | |
| }; | |
| //------------------------------------------------------------------------------ | |
| // | |
| // void DumpTree() | |
| // | |
| // @brief: 構文木をダンプする | |
| // @update: 2011/10/08 by Y.ODA | |
| // @args: | |
| // tree_match<const char *>::tree_iterator &iter: 対象の木のイテレータ | |
| // int nest: 現在の階層 | |
| // @ret: | |
| // (void) | |
| // | |
| //------------------------------------------------------------------------------ | |
| void DumpTree(tree_match<const char *>::tree_iterator &iter, int nest) | |
| { | |
| // インデント | |
| for (int i = 0; i < nest; ++i) | |
| std::cout << "| "; | |
| std::string id | |
| = iter->value.id() == PascalParser::ID_BASETYPE ? "basetype" | |
| : iter->value.id() == PascalParser::ID_IDENTIFIER ? "identifier" | |
| : iter->value.id() == PascalParser::ID_LITERAL ? "literal" | |
| : iter->value.id() == PascalParser::ID_PROGRAM ? "program" | |
| : iter->value.id() == PascalParser::ID_DEF_BLOCK ? "def_block" | |
| : iter->value.id() == PascalParser::ID_CODE_BLOCK ? "code_block" | |
| : iter->value.id() == PascalParser::ID_CONST_DEF ? "const_def" | |
| : iter->value.id() == PascalParser::ID_TYPE_DEF ? "type_def" | |
| : iter->value.id() == PascalParser::ID_VARIABLE_DEF ? "variable_def" | |
| : iter->value.id() == PascalParser::ID_PROCEDUE_DEF ? "procedure_def" | |
| : iter->value.id() == PascalParser::ID_FUNCTION_DEF ? "function_def" | |
| : iter->value.id() == PascalParser::ID_ARG_DEF ? "arg_def" | |
| : iter->value.id() == PascalParser::ID_TYPE_DESC ? "type_desc" | |
| : iter->value.id() == PascalParser::ID_ARRAY_DESC ? "array_desc" | |
| : iter->value.id() == PascalParser::ID_RECORD_DESC ? "record_desc" | |
| : iter->value.id() == PascalParser::ID_STATEMENT ? "statement" | |
| : iter->value.id() == PascalParser::ID_IF_BLOCK ? "if_block" | |
| : iter->value.id() == PascalParser::ID_CASE_BLOCK ? "case_block" | |
| : iter->value.id() == PascalParser::ID_FOR_BLOCK ? "for_block" | |
| : iter->value.id() == PascalParser::ID_WHILE_BLOCK ? "while_block" | |
| : iter->value.id() == PascalParser::ID_REPEAT_BLOCK ? "repeat_block" | |
| : iter->value.id() == PascalParser::ID_PROCEDURE_CALL ? "procedure_call" | |
| : iter->value.id() == PascalParser::ID_SUBSTITUTION ? "substitution" | |
| : iter->value.id() == PascalParser::ID_VARIABLE ? "variable" | |
| : iter->value.id() == PascalParser::ID_EXPRESSION ? "expression" | |
| : iter->value.id() == PascalParser::ID_SIMPLE_EXPRESSION ? "simple_expression" | |
| : iter->value.id() == PascalParser::ID_TERM ? "term" | |
| : iter->value.id() == PascalParser::ID_FACTOR ? "factor" | |
| : "unknown"; | |
| std::string data(iter->value.begin(), iter->value.end()); | |
| // 要素を表示 | |
| if (data.size() > 0) | |
| std::cout << id << " \"" << data << "\"" << std::endl; | |
| else | |
| std::cout << id << std::endl; | |
| // 子ノードを表示 | |
| tree_match<const char *>::tree_iterator child; | |
| for (child = iter->children.begin(); child != iter->children.end(); ++child) | |
| DumpTree(child, nest+1); | |
| } | |
| //------------------------------------------------------------------------------ | |
| // | |
| // bool parse_pascal() | |
| // | |
| // @brief: Pascalの構文解析 | |
| // @update: 2011/10/08 by Y.ODA | |
| // @args: | |
| // const char *data: Pascalソースコードを格納した文字列(終端'\0') | |
| // @ret: | |
| // (bool) 成功したかどうか | |
| // | |
| //------------------------------------------------------------------------------ | |
| bool parse_pascal(const char *data) | |
| { | |
| PascalParser g; | |
| PascalSkipParser skip_p; | |
| tree_parse_info<> r = ast_parse(data, g, skip_p); | |
| //parse_info<> r = parse(data, g, skip_p); | |
| if (r.match) | |
| //if (r.hit) | |
| { | |
| std::cout << "Succeeded." << std::endl; | |
| DumpTree(r.trees.begin(), 0); | |
| } | |
| else | |
| std::cout << "Failed." << std::endl; | |
| return r.match; | |
| //return r.hit; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment