Created
April 1, 2013 16:34
-
-
Save mqu/5286033 to your computer and use it in GitHub Desktop.
a mathematical parser using Treetop and Ryby. You can parse mathematical expression (but not compute now), for example : (12*(123+4)/(54-12))/3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# coding: utf-8 | |
require 'pp' | |
require 'treetop' | |
class ParserNode < Treetop::Runtime::SyntaxNode | |
end | |
class IntegerNode < ParserNode | |
def initialize *args | |
puts "IntegerNode.new" | |
super *args | |
end | |
def to_array | |
pp self | |
return self.text_value.to_i | |
end | |
end | |
class NumberNode < ParserNode | |
end | |
class GroupNode < ParserNode | |
end | |
class OperandNode < ParserNode | |
end | |
class ExpressionNode < ParserNode | |
end | |
class FloatNode < ParserNode | |
end | |
class OperatorNode < ParserNode | |
end | |
class BodyNode < ParserNode | |
def to_array | |
return self.elements.map {|x| x.to_array} | |
end | |
end | |
class Parser | |
# Load the Treetop grammar from the 'sexp_parser' file, and create a new | |
# instance of that parser as a class variable so we don't have to re-create | |
# it every time we need to parse a string | |
Treetop.load('calc.treetop') | |
puts ("#Parser loaded") | |
@@parser = CalcParser.new | |
puts ("#Parser created") | |
# Parse an input string and return a Ruby array like: | |
# [:this, :is, [:a, :test]] | |
def self.parse(data) | |
# Pass the data over to the parser instance | |
tree = @@parser.parse(data) | |
# If the AST is nil then there was an error during parsing | |
# we need to report a simple error message to help the user | |
if(tree.nil?) | |
raise Exception, "Parse error at offset: #{@@parser.index}" + @@parser.failure_reason | |
end | |
# Remove all syntax nodes that aren't one of our custom | |
# classes. If we don't do this we will end up with a *lot* | |
# of essentially useless nodes | |
self.clean_tree(tree) | |
# Convert the AST into an array representation of the input | |
# structure and return it | |
# return tree.to_array | |
return tree | |
end | |
private | |
def self.clean_tree(root_node) | |
return if(root_node.elements.nil?) | |
root_node.elements.delete_if{|node| node.class.name == "Treetop::Runtime::SyntaxNode" } | |
root_node.elements.each {|node| self.clean_tree(node) } | |
end | |
end | |
if __FILE__ == $0 | |
pp Parser.parse ARGV[0] | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file contains the PEG grammar definition that | |
# Treetop uses to generate our parser. | |
# For more information check out the Treetop site:http://treetop.rubyforge.org/ | |
# | |
# this grammar support a simple calculator expression grammar. | |
# | |
# supported expressions : | |
# - '' (empty) | |
# - number | |
# - number operator number | |
# - group operator group | |
# - operator is from : [*/+-] | |
# - group is : '(' expression ')' | |
grammar Calc | |
rule body | |
( expression / space )* <BodyNode> | |
end | |
rule expression | |
( operand ( space* operator operand )* )+ <ExpressionNode> | |
end | |
rule operand | |
group / number | |
end | |
# do not allow empty group : () | |
rule group | |
'(' expression ')' <GroupNode> | |
end | |
# order here make operator precedence. | |
rule operator | |
( '*' <OperatorNode> | |
/ '/' <OperatorNode> | |
/ '+' <OperatorNode> | |
/ '-' <OperatorNode> | |
) | |
end | |
# take care of order here | |
rule number | |
float / integer | |
end | |
rule integer | |
sign? digits <IntegerNode> | |
end | |
# float numbers ; supported | |
rule float | |
sign? digits ('.' digits )? (('e'/'E') sign? digits )? <FloatNode> | |
end | |
rule sign | |
('+' / '-') | |
end | |
rule digits | |
[0-9]+ | |
end | |
rule space | |
[\s\t]+ | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment