Created
January 16, 2011 04:24
-
-
Save postmodern/781559 to your computer and use it in GitHub Desktop.
An ANSI C Parser using the Ruby Parslet library.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # | |
| # A C Parser using the Parslet library. | |
| # | |
| # ANSI C Grammar: | |
| # | |
| # * http://www.lysator.liu.se/c/ANSI-C-grammar-l.html | |
| # * http://www.lysator.liu.se/c/ANSI-C-grammar-y.html | |
| # | |
| require 'parslet' | |
| class CParser < Parslet::Parser | |
| rule(:new_line) { match('[\n\r]').repeat(1) } | |
| rule(:space) { match('[ \t\v\n\f]') } | |
| rule(:spaces) { space.repeat(1) } | |
| rule(:space?) { space.maybe } | |
| rule(:spaces?) { space.repeat } | |
| rule(:digit) { match('[0-9]') } | |
| rule(:digits) { digit.repeat(1) } | |
| rule(:digits?) { digit.repeat } | |
| rule(:alpha) { match('[a-zA-Z_]') } | |
| rule(:xdigit) { digit | match('[a-fA-F]') } | |
| rule(:e) { match('[eE]') >> match('[+-]').maybe >> digit.repeat(1) } | |
| rule(:float_size) { match('[fFlL]') } | |
| rule(:int_size) { match('[uUlL]').repeat } | |
| rule(:e?) { e.maybe } | |
| rule(:float_size?) { float_size.maybe } | |
| rule(:int_size?) { int_size.maybe } | |
| rule(:comment) { | |
| (str('/*') >> (str('*/').absnt? >> any).repeat >> str('*/')) | | |
| (str('//') >> (new_line.absnt? >> any).repeat >> new_line) | |
| } | |
| def self.keywords(*names) | |
| names.each do |name| | |
| rule("#{name}_keyword") { str(name.to_s).as(:keyword) >> spaces? } | |
| end | |
| end | |
| keywords :auto, :break, :case, :char, :const, :continue, :default, :do, | |
| :double, :else, :enum, :extern, :float, :for, :goto, :if, :int, | |
| :long, :register, :return, :short, :signed, :sizeof, :static, | |
| :struct, :switch, :typedef, :union, :unsigned, :void, :volatile, | |
| :while | |
| rule(:identifier) { | |
| (alpha >> (alpha | digit).repeat) >> spaces? | |
| } | |
| rule(:hex_constant) { | |
| match('0[xX]') >> xdigit.repeat(1) >> int_size? >> spaces? | |
| } | |
| rule(:octal_constant) { | |
| str('0') >> digits >> int_size? >> spaces? | |
| } | |
| rule(:decimal_constant) { digits >> int_size.maybe >> spaces? } | |
| rule(:string_constant) { | |
| alpha.maybe >> str("'") >> | |
| (match("\\.") | match("[^\\']")).repeat(1) >> | |
| str("'") >> spaces? | |
| } | |
| rule(:float_constant) { | |
| ( | |
| (digits >> e >> float_size?) | | |
| (digits? >> str('.') >> digits >> e? >> float_size?) | | |
| (digits >> str('.') >> digits? >> e? >> float_size?) | |
| ) >> spaces? | |
| } | |
| rule(:constant) { | |
| hex_constant | | |
| octal_constant | | |
| decimal_constant | | |
| float_constant | | |
| string_constant | |
| } | |
| rule(:string_literal) { | |
| alpha.maybe >> str('"') >> | |
| (match("\\.") | match('[^\\"]')).repeat >> | |
| str('"') >> spaces? | |
| } | |
| def self.symbols(symbols) | |
| symbols.each do |name,symbol| | |
| rule(name) { str(symbol) >> spaces? } | |
| end | |
| end | |
| symbols :ellipsis => '...', | |
| :right_shift_assign => '>>=', | |
| :left_shift_assign => '<<=', | |
| :add_assign => '+=', | |
| :subtract_assign => '-=', | |
| :multiply_assign => '*=', | |
| :divide_assign => '/=', | |
| :modulus_assign => '%=', | |
| :binary_and_assign => '&=', | |
| :xor_assign => '^=', | |
| :binary_or_assign => '|=', | |
| :right_shift => '>>', | |
| :left_shift => '<<', | |
| :inc => '++', | |
| :dec => '--', | |
| :pointer_access => '->', | |
| :logical_and => '&&', | |
| :logical_or => '||', | |
| :less_equal => '<=', | |
| :greater_equal => '>=', | |
| :equal => '==', | |
| :not_equal => '!=', | |
| :semicolon => ';', | |
| :comma => ',', | |
| :colon => ':', | |
| :assign => '=', | |
| :left_paren => '(', | |
| :right_paren => ')', | |
| :member_access => '.', | |
| :binary_and => '&', | |
| :negate => '!', | |
| :inverse => '~', | |
| :subtract => '-', | |
| :add => '+', | |
| :multiply => '*', | |
| :divide => '/', | |
| :modulus => '%', | |
| :less => '<', | |
| :greater => '>', | |
| :xor => '^', | |
| :binary_or => '|', | |
| :question_mark => '?' | |
| rule(:left_brace) { (str('{') | str('<%')) >> spaces? } | |
| rule(:right_brace) { (str('}') | str('%>')) >> spaces? } | |
| rule(:left_bracket) { (str('[') | str('<:')) >> spaces? } | |
| rule(:right_bracket) { (str(']') | str(':>')) >> spaces? } | |
| rule(:primary_expression) { | |
| ( | |
| identifier.as(:identifier) | | |
| constant.as(:constant) | | |
| string_literal.as(:literal_string) | |
| ) | | |
| (left_paren >> expression >> right_paren) | |
| } | |
| rule(:postfix_expression) { | |
| primary_expression >> ( | |
| (left_bracket >> expression >> right_bracket) | | |
| (left_paren >> argument_expression_list.maybe >> right_paren) | | |
| ((member_access | pointer_access) >> identifier) | | |
| inc | dec | |
| ).repeat | |
| } | |
| rule(:argument_expression_list) { | |
| (assignment_expression >> comma >> argument_expression_list) | | |
| assignment_expression | |
| } | |
| rule(:sizeof_expression) { | |
| sizeof_keyword >> ( | |
| (unary_expression.as(:expr)) | | |
| (left_paren >> type_name.as(:type) >> right_paren) | |
| ) | |
| } | |
| rule(:unary_expression) { | |
| sizeof_expression.as(:sizeof) | | |
| postfix_expression | | |
| (inc >> unary_expression.as(:inc)) | | |
| (dec >> unary_expression.as(:dec)) | | |
| (unary_operator.as(:op) >> cast_expression).as(:unary) | |
| } | |
| rule(:unary_operator) { | |
| (binary_and | multiply | add | subtract | inverse | negate) | |
| } | |
| rule(:cast_expression) { | |
| (left_paren >> type_name.as(:type) >> right_paren >> cast_expression).as(:cast) | | |
| unary_expression | |
| } | |
| rule(:multiplicative_expression) { | |
| ( | |
| cast_expression.as(:left) >> | |
| (multiply | divide | modulus).as(:op) >> | |
| multiplicative_expression.as(:right) | |
| ).as(:multiplicative) | cast_expression | |
| } | |
| rule(:additive_expression) { | |
| ( | |
| multiplicative_expression.as(:left) >> | |
| (add | subtract).as(:op) >> | |
| additive_expression.as(:right) | |
| ).as(:additive) | multiplicative_expression | |
| } | |
| rule(:shift_expression) { | |
| ( | |
| additive_expression.as(:left) >> | |
| (left_shift | right_shift).as(:op) >> | |
| shift_expression.as(:right) | |
| ).as(:shift) | additive_expression | |
| } | |
| rule(:relational_expression) { | |
| ( | |
| shift_expression.as(:left) >> | |
| (less | greater | less_equal | greater_equal).as(:op) >> | |
| relational_expression.as(:right) | |
| ).as(:relational) | shift_expression | |
| } | |
| rule(:equality_expression) { | |
| ( | |
| relational_expression.as(:left) >> | |
| (equal | not_equal).as(:op) >> | |
| equality_expression.as(:right) | |
| ).as(:equality) | relational_expression | |
| } | |
| rule(:and_expression) { | |
| ( | |
| equality_expression.as(:left) >> | |
| binary_and >> | |
| and_expression.as(:right) | |
| ).as(:binary_and) | equality_expression | |
| } | |
| rule(:exclusive_or_expression) { | |
| ( | |
| and_expression.as(:left) >> | |
| xor >> | |
| exclusive_or_expression.as(:right) | |
| ).as(:xor) | and_expression | |
| } | |
| rule(:inclusive_or_expression) { | |
| ( | |
| exclusive_or_expression.as(:left) >> | |
| binary_or >> | |
| inclusive_or_expression.as(:right) | |
| ).as(:binary_or) | exclusive_or_expression | |
| } | |
| rule(:logical_and_expression) { | |
| ( | |
| inclusive_or_expression.as(:left) >> | |
| logical_and >> | |
| logical_and_expression.as(:right) | |
| ).as(:logical_and) | inclusive_or_expression | |
| } | |
| rule(:logical_or_expression) { | |
| ( | |
| logical_and_expression.as(:left) >> | |
| logical_or >> | |
| logical_or_expression.as(:right) | |
| ).as(:logical_or) | logical_and_expression | |
| } | |
| rule(:conditional_expression) { | |
| ( | |
| logical_or_expression.as(:condition) >> question_mark >> | |
| expression.as(:true) >> colon >> | |
| conditional_expression.as(:false) | |
| ).as(:conditional) | logical_or_expression | |
| } | |
| rule(:assignment_expression) { | |
| ( | |
| unary_expression.as(:left) >> | |
| assignment_operator.as(:op) >> | |
| assignment_expression.as(:right) | |
| ).as(:assign) | conditional_expression | |
| } | |
| rule(:assignment_operator) { | |
| assign | | |
| multiply_assign | | |
| divide_assign | | |
| modulus_assign | | |
| add_assign | | |
| subtract_assign | | |
| left_shift_assign | | |
| right_shift_assign | | |
| binary_and_assign | | |
| xor_assign | | |
| binary_or_assign | |
| } | |
| rule(:expression) { | |
| assignment_expression >> (comma >> assignment_expression).repeat | |
| } | |
| rule(:expression?) { expression.maybe } | |
| rule(:constant_expression) { conditional_expression } | |
| rule(:constant_expression?) { constant_expression.maybe } | |
| rule(:declaration) { | |
| declaration_specifiers >> init_declarator_list.maybe >> semicolon | |
| } | |
| rule(:declaration_specifiers) { | |
| ( | |
| storage_class_specifier.as(:specifier) | | |
| type_specifier.as(:type) | | |
| type_qualifier.as(:qualifier) | |
| ).repeat(1) | |
| } | |
| rule(:init_declarator_list) { | |
| init_declarator >> (comma >> init_declarator).repeat | |
| } | |
| rule(:init_declarator) { | |
| declarator >> (assign >> initializer).maybe | |
| } | |
| rule(:storage_class_specifier) { | |
| typedef_keyword | | |
| extern_keyword | | |
| static_keyword | | |
| auto_keyword | | |
| register_keyword | |
| } | |
| rule(:type_specifier) { | |
| void_keyword | | |
| char_keyword | | |
| short_keyword | | |
| int_keyword | | |
| long_keyword | | |
| float_keyword | | |
| double_keyword | | |
| signed_keyword | | |
| unsigned_keyword | | |
| struct_or_union_specifier | | |
| enum_specifier | |
| } | |
| rule(:struct_or_union_specifier) { | |
| struct_or_union >> ( | |
| ( | |
| identifier.maybe >> | |
| (left_brace >> struct_declaration_list >> right_brace) | |
| ) | identifier | |
| ) | |
| } | |
| rule(:struct_or_union) { struct_keyword | union_keyword } | |
| rule(:struct_declaration_list) { struct_declaration.repeat(1) } | |
| rule(:struct_declaration) { | |
| specifier_qualifier_list >> struct_declarator_list >> semicolon | |
| } | |
| rule(:specifier_qualifier_list) { | |
| (type_specifier | type_qualifier).repeat(1) | |
| } | |
| rule(:struct_declarator_list) { | |
| struct_declarator >> (comma >> struct_declarator).repeat | |
| } | |
| rule(:struct_declarator) { | |
| (declarator.maybe >> (colon >> constant_expression)) | | |
| declarator | |
| } | |
| rule(:enum_specifier) { | |
| enum_keyword >> ( | |
| (identifier.maybe >> (left_brace >> enumerator_list >> right_brace)) | | |
| identifier | |
| ) | |
| } | |
| rule(:enumerator_list) { | |
| enumerator >> (comma >> enumerator).repeat | |
| } | |
| rule(:enumerator) { | |
| identifier >> (assign >> constant_expression).maybe | |
| } | |
| rule(:type_qualifier) { const_keyword | volatile_keyword } | |
| rule(:declarator) { pointer? >> direct_declarator } | |
| rule(:direct_declarator) { | |
| (identifier | (left_paren >> declarator >> right_paren)) >> | |
| ( | |
| ( | |
| left_bracket >> | |
| constant_expression.maybe.as(:size) >> | |
| right_bracket | |
| ).as(:array) | ( | |
| left_paren >> | |
| (parameter_type_list | identifier_list).maybe >> | |
| right_paren | |
| ) | |
| ).repeat | |
| } | |
| rule(:pointer) { | |
| multiply >> (multiply | type_qualifier_list).repeat | |
| } | |
| rule(:pointer?) { pointer.maybe } | |
| rule(:type_qualifier_list) { type_qualifier.repeat(1) } | |
| rule(:parameter_type_list) { | |
| parameter_list >> (comma >> ellipsis).maybe | |
| } | |
| rule(:parameter_type_list?) { parameter_type_list.maybe } | |
| rule(:parameter_list) { | |
| parameter_declaration >> (comma >> parameter_declaration).repeat | |
| } | |
| rule(:parameter_declaration) { | |
| declaration_specifiers >> (declarator | abstract_declarator).maybe | |
| } | |
| rule(:identifier_list) { | |
| identifier >> (comma >> identifier).repeat | |
| } | |
| rule(:type_name) { | |
| specifier_qualifier_list >> abstract_declarator.maybe | |
| } | |
| rule(:abstract_declarator) { | |
| (pointer? >> direct_abstract_declarator) | pointer | |
| } | |
| rule(:direct_abstract_declarator) { | |
| ( | |
| (left_paren >> abstract_declarator >> right_paren) | | |
| (left_bracket >> constant_expression? >> right_bracket) | | |
| (left_paren >> parameter_type_list? >> right_paren) | |
| ) >> ( | |
| (left_bracket >> constant_expression? >> right_bracket) | | |
| (left_paren >> parameter_type_list? >> right_paren) | |
| ).repeat | |
| } | |
| rule(:initializer) { | |
| assignment_expression | | |
| (left_brace >> initializer_list >> comma.maybe >> right_brace) | |
| } | |
| rule(:initializer_list) { | |
| initializer >> (comma >> initializer).repeat | |
| } | |
| rule(:statement) { | |
| labeled_statement | | |
| compound_statement | | |
| expression_statement | | |
| selection_statement | | |
| iteration_statement | | |
| jump_statement | |
| } | |
| rule(:label_statement) { | |
| (identifier | default_keyword).as(:name) >> colon >> | |
| statement.as(:body) | |
| } | |
| rule(:case_statement) { | |
| case_keyword >> constant_expression.as(:key) >> colon >> | |
| statement.as(:body) | |
| } | |
| rule(:labeled_statement) { | |
| label_statement.as(:label) | case_statement.as(:case) | |
| } | |
| rule(:compound_statement) { | |
| left_brace >> | |
| declaration_list.maybe.as(:declarations) >> statement_list.maybe >> | |
| right_brace | |
| } | |
| rule(:declaration_list) { declaration.repeat(1) } | |
| rule(:statement_list) { statement.repeat(1) } | |
| rule(:expression_statement) { expression? >> semicolon } | |
| rule(:if_statement) { | |
| if_keyword >> | |
| left_paren >> expression.as(:condition) >> right_paren >> | |
| statement.as(:body) >> | |
| (else_keyword >> statement.as(:else)).maybe | |
| } | |
| rule(:switch_statement) { | |
| switch_keyword >> | |
| left_paren >> expression.as(:expression) >> right_paren >> | |
| statement.as(:body) | |
| } | |
| rule(:selection_statement) { | |
| if_statement.as(:if) | switch_statement.as(:switch) | |
| } | |
| rule(:while_statement) { | |
| while_keyword >> | |
| left_paren >> expression.as(:condition) >> right_paren >> | |
| statement.as(:body) | |
| } | |
| rule(:do_while_statement) { | |
| do_keyword >> statement.as(:body) >> while_keyword >> | |
| left_paren >> expression.as(:condition) >> right_paren >> semicolon | |
| } | |
| rule(:for_statement) { | |
| for_keyword >> left_paren >> | |
| expression_statement.as(:initializer) >> | |
| expression_statement.as(:condition) >> | |
| expression.maybe.as(:update) >> | |
| right_paren >> | |
| statement.as(:body) | |
| } | |
| rule(:iteration_statement) { | |
| while_statement.as(:while) | | |
| do_while_statement.as(:do_while) | | |
| for_statement.as(:for) | |
| } | |
| rule(:jump_statement) { | |
| ( | |
| (goto_keyword >> identifier.as(:goto)) | | |
| continue_keyword.as(:continue) | | |
| break_keyword.as(:break) | | |
| (return_keyword >> expression.maybe.as(:value)).as(:return) | |
| ) >> semicolon | |
| } | |
| rule(:translation_unit) { external_declaration.repeat(1) } | |
| rule(:external_declaration) { function_definition.as(:function) | declaration } | |
| rule(:function_definition) { | |
| declaration_specifiers.maybe >> | |
| declarator >> | |
| declaration_list.maybe >> | |
| compound_statement.as(:body) | |
| } | |
| root :translation_unit | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Current example usage and output: