Skip to content

Instantly share code, notes, and snippets.

@vjt
Created January 28, 2009 10:38
Show Gist options
  • Save vjt/53893 to your computer and use it in GitHub Desktop.
Save vjt/53893 to your computer and use it in GitHub Desktop.
# Originally posted on http://pastie.org/pastes/36861 (31 January 2007)
Boolean/Mathematical Expressions Parser, built with the *excellent* Dhaka language
generation library, available on http://dhaka.rubyforge.org/.
Have a look at the extensive test suite for insights on how to use this software,
especially the protected methods in `test/boolean_expression_parser_test.rb`.
The software is licensed under the same terms as Ruby.
Have fun!
- [email protected]
# Driver.
# - [email protected]
#
require 'boolean_expression_tokenizer'
require 'boolean_expression_parser'
require 'boolean_expression_evaluator'
class BooleanExpressionDriver
def initialize(variables = {})
@evaluator = BooleanExpressionEvaluator.new(variables)
end
def variables
@evaluator.variables
end
def variables=(variables)
@evaluator.variables = variables
end
def evaluate(expression, variables = {})
tokenize_result = BooleanExpressionTokenizer.tokenize(expression)
if tokenize_result.has_error?
raise BooleanExpressionError, tokenize_error_message(tokenize_result.unexpected_char_index, expression)
end
parse_result = BooleanExpressionParser.parse(tokenize_result)
if parse_result.has_error?
raise BooleanExpressionError, parse_error_message(parse_result.unexpected_token, expression)
end
@evaluator.variables.update(variables)
@evaluator.evaluate(parse_result.parse_tree)
end
protected
def tokenize_error_message unexpected_char_index, expression
raise BooleanExpressionError, "Unexpected character #{expression[unexpected_char_index].chr}:\n#{marked_expression(expression, unexpected_char_index)}"
end
def parse_error_message unexpected_token, expression
if unexpected_token.symbol_name == Dhaka::END_SYMBOL_NAME
"Unexpected end of file."
else
"Unexpected token #{unexpected_token.symbol_name}:\n#{marked_expression(expression, unexpected_token.input_position)}"
end
end
def marked_expression(expression, error_position)
if error_position > 0
expression[0..(error_position-1)] + ">>>" + expression[error_position..-1]
else
">>>"+expression
end
end
end
# Evaluator.
# - [email protected]
#
require 'dhaka'
require 'boolean_expression_grammar'
class BooleanExpressionEvaluator < Dhaka::Evaluator
self.grammar = BooleanExpressionGrammar
define_evaluation_rules do
# Start symbol
#
for_simple_expression do
evaluate(child_nodes[0])
end
for_math_expression do
evaluate(child_nodes[0])
end
# Bool expression
#
for_bool_value do
evaluate(child_nodes[0])
end
for_or_expression do
evaluate(child_nodes[0]) or evaluate(child_nodes[2])
end
for_and_expression do
evaluate(child_nodes[0]) and evaluate(child_nodes[2])
end
# Bool value
#
for_bool_literal do
child_nodes[0].token.value == 'true'
end
for_less_than do
evaluate(child_nodes[0]) < evaluate(child_nodes[2])
end
for_less_or_equal_than do
evaluate(child_nodes[0]) <= evaluate(child_nodes[2])
end
for_greater_than do
evaluate(child_nodes[0]) > evaluate(child_nodes[2])
end
for_greater_or_equal_than do
evaluate(child_nodes[0]) >= evaluate(child_nodes[2])
end
for_equal_to do
evaluate(child_nodes[0]) == evaluate(child_nodes[2])
end
for_different_to do
evaluate(child_nodes[0]) != evaluate(child_nodes[2])
end
for_parenthesized_bool_expression do
evaluate(child_nodes[1])
end
for_negated_bool_expression do
not evaluate(child_nodes[1])
end
# Math expression
#
for_subtraction do
evaluate(child_nodes[0]) - evaluate(child_nodes[2])
end
for_addition do
evaluate(child_nodes[0]) + evaluate(child_nodes[2])
end
for_division do
evaluate(child_nodes[0]) / evaluate(child_nodes[2])
end
for_multiplication do
evaluate(child_nodes[0]) * evaluate(child_nodes[2])
end
for_int_literal do
child_nodes[0].token.value.to_i
end
for_float_literal do
child_nodes[0].token.value.to_f
end
for_parenthesized_math_expression do
evaluate(child_nodes[1])
end
for_negated_math_expression do
-evaluate(child_nodes[1])
end
for_power do
evaluate(child_nodes[0])**evaluate(child_nodes[2])
end
for_variable_name do
child_nodes[0].token.value
end
for_function_name do
child_nodes[0].token.value
end
for_bool_name do
child_nodes[0].token.value
end
for_variable_reference do
variable_name = evaluate(child_nodes[0])
raise BooleanExpressionError, "Undefined variable #{variable_name}" unless @variables.has_key? variable_name
@variables[variable_name]
end
for_function_call do
function_name = evaluate(child_nodes[0])
raise BooleanExpressionError, "Invalid function #{function_name}" unless Math.respond_to? function_name
argument = evaluate(child_nodes[2])
if /^(sin|cos|tan)/.match function_name
argument = argument * Math::PI / 180
end
Math.send function_name, argument
end
end
attr_accessor :variables
def initialize(variables = {})
@variables = variables
end
end
class BooleanExpressionError < StandardError
end
# Grammar.
# - [email protected]
#
require 'dhaka'
class BooleanExpressionGrammar < Dhaka::Grammar
precedences do
left ['less_than', 'less_or_equal_than', 'greater_than', 'greater_or_equal_than', 'equal_to', 'different_to']
left ['+', '-']
left ['*', '/']
nonassoc ['^']
left ['or']
left ['and']
nonassoc ['not']
end
for_symbol(Dhaka::START_SYMBOL_NAME) do
simple_expression ['bool_expression']
math_expression ['math_expression']
end
for_symbol('bool_expression') do
bool_value ['bool_value']
or_expression ['bool_expression', 'or', 'bool_value'] # this is a cargo cult
and_expression ['bool_expression', 'and', 'bool_expression']
end
for_symbol('bool_value') do
bool_literal ['bool_literal']
less_than ['math_expression', 'less_than', 'math_expression']
less_or_equal_than ['math_expression', 'less_or_equal_than', 'math_expression']
greater_than ['math_expression', 'greater_than', 'math_expression']
greater_or_equal_than ['math_expression', 'greater_or_equal_than', 'math_expression']
equal_to ['math_expression', 'equal_to', 'math_expression']
different_to ['math_expression', 'different_to', 'math_expression']
parenthesized_bool_expression ['(', 'bool_expression', ')']
negated_bool_expression ['not', 'bool_expression']
end
for_symbol('math_expression') do
addition ['math_expression', '+', 'math_expression']
subtraction ['math_expression', '-', 'math_expression']
multiplication ['math_expression', '*', 'math_expression']
division ['math_expression', '/', 'math_expression']
power ['math_expression', '^', 'math_expression']
int_literal ['int_literal']
float_literal ['float_literal']
variable_reference ['var_name']
function_call ['func_name', '(', 'math_expression', ')']
parenthesized_math_expression ['(', 'math_expression', ')']
negated_math_expression ['-', 'math_expression'], :prec => '*'
end
for_symbol('var_name') do
variable_name ['word_literal']
end
for_symbol('func_name') do
function_name ['word_literal']
end
end
require 'test/unit'
require 'boolean_expression_grammar'
require 'boolean_expression_tokenizer'
require 'boolean_expression_evaluator'
require 'fake_logger'
class Numeric
def radians
self * Math::PI / 180
end
end
include Math
class TestBooleanExpressionParser < Test::Unit::TestCase
def test_compilation_of_parser
logger = FakeLogger.new
parser = Dhaka::Parser.new(BooleanExpressionGrammar, logger)
eval(parser.compile_to_ruby_source_as(:BooleanExpressionParser))
assert_equal(34, logger.warnings.size)
assert_equal(0, logger.errors.size)
end
def test_integer_expressions # !> private attribute?
assert_correct_evaluation '5 * -14 / (2 * 7 - 7) + 2'
assert_correct_evaluation '-2 ** 2'
assert_correct_evaluation '2 + 2 ** 3'
assert_correct_evaluation '(2 + 2) ** 3'
assert_correct_evaluation '(2 + 2) ** 3 * 2'
end
def test_variables_reference
assert_correct_evaluation '(2 + foo) * bar / 2', 'foo' => 4, 'bar' => 420
assert_correct_evaluation 'foo + 2 + sqrt(bar) * log(baz)', 'foo' => Math::PI, 'bar' => 31, 'baz' => 3.7
end
def test_float_expressions
assert_correct_evaluation '-5.0 + 1.0'
assert_correct_evaluation '4.1 * 2 + 0.3'
assert_correct_evaluation '0.3 + 3 * 2 ** 3 + 0.4'
end
def test_function_calls
[2, 3, 7, 9, 12, 420, 4935].each do |num|
%w(sqrt log).each do |func|
assert_correct_evaluation "#{func}(#{num})"
end
%w(sin cos tan).each do |func|
assert_equal(Math.send(func, num.radians), evaluate("#{func}(#{num})"))
end
end
[0.2, 0.4, 0.83, 0.92, 0.11, 0.753].each do |num|
%w(asin acos atan).each do |func|
assert_correct_evaluation "#{func}(#{num})"
end
end
end
def test_some_boolean_expressions
assert_correct_evaluation 'a * 45 + sin(14) >= 3 - log(45) or b == 2', 'a' => 5, 'b' => 2, 'c' => 34, 'd' => 1
assert_correct_evaluation 'a * d + cos(c) >= 3 - log(45) and b == 2', 'a' => 5, 'b' => 2, 'c' => 34, 'd' => 1
assert_correct_evaluation '2 > 2'
assert_correct_evaluation '2 == 2'
assert_correct_evaluation '2 > 1 or 2 < 1'
assert_correct_evaluation '2 > 1 and 2 < 1'
assert_correct_evaluation '10 * 2 >= 4 + 0 * 2'
assert_correct_evaluation 'true and false or true'
assert_correct_evaluation 'true or false or true and false'
assert_correct_evaluation 'true and false'
assert_correct_evaluation 'true or false'
assert_correct_evaluation 'false and true or false'
assert_correct_evaluation 'true and false or true and true'
assert_correct_evaluation '(true and false) or true'
assert_correct_evaluation '(false or (true and false)) and (true or false)'
end
def test_parse_errors
assert(tokenize("1.3.4 * 2").has_error?)
assert(parse("(2+2)^3^2").has_error?)
assert_raise(BooleanExpressionError) { evaluate("suxalot(4)") }
end
protected
def tokenize(input)
BooleanExpressionTokenizer.tokenize(input)
end
def parse(input)
input = tokenize(input) if input.is_a? String
BooleanExpressionParser.parse input
end
def evaluate(input, variables = {})
input = parse(input) if input.is_a? String
@_eval ||= BooleanExpressionEvaluator.new
@_eval.variables = variables unless variables.empty?
@_eval.evaluate input.parse_tree
end
# let's ask ruby itself if our computation is correct.. :)
#
def assert_correct_evaluation(*expressions)
expr_variables = expressions.pop if expressions.last.is_a? Hash
expr_variables ||= {}
ruby_variables = expr_variables.map { |var, value| "#{var}=#{value};" }.join
expressions.each do |expr|
assert_equal eval(ruby_variables + expr).to_s, evaluate(expr, expr_variables).to_s
end
end
end
# Tokenizer.
# - [email protected]
#
require 'dhaka'
class BooleanExpressionTokenizer < Dhaka::Tokenizer
CONNECTORS = [ 'and', 'or', 'not' ]
BOOLEANS = [ 'true', 'false' ]
digits = ('0'..'9').to_a
letters = ('a'..'z').to_a + ('A'..'Z').to_a
parens = ['(', ')']
dollar = ['$']
dot = ['.']
operators = ['-', '+', '/', '*', '^']
cmp_operators = ['<', '>', '=', '!']
whitespace = [' ', "\n", "\r", "\t"]
all_characters = digits + dollar + letters + parens + operators + cmp_operators + whitespace
for_state Dhaka::TOKENIZER_IDLE_STATE do
for_characters(all_characters - (digits + dot + dollar + letters + cmp_operators + whitespace)) do
create_token(curr_char, nil)
advance
# ** alias for ^
if curr_char == '*' and curr_token.symbol_name == '*'
curr_token.symbol_name = '^'
advance
end
end
for_characters digits do
create_token('int_literal', '')
switch_to :get_number_literal
end
for_characters letters do
create_token(nil, '')
switch_to :get_word_literal
end
for_character dollar do
advance # skip the dollar
create_token(nil, '$')
switch_to :get_word_literal
end
for_characters cmp_operators do
create_token('', '')
switch_to :get_comparison_operator
end
for_character whitespace do
advance
end
end
for_state :get_number_literal do
for_characters all_characters - digits do
switch_to Dhaka::TOKENIZER_IDLE_STATE
end
for_character dot do
if curr_token.symbol_name == 'float_literal'
switch_to :error
else
curr_token.symbol_name = 'float_literal'
curr_token.value += curr_char
advance
end
end
for_characters digits do
curr_token.value += curr_char
advance
end
end
for_state :get_word_literal do
for_characters all_characters - (letters + digits) do
curr_token.symbol_name = word_literal_symbol(curr_token.value)
switch_to Dhaka::TOKENIZER_IDLE_STATE
end
for_characters letters + digits do
curr_token.value += curr_char
advance
curr_token.symbol_name = word_literal_symbol(curr_token.value) unless curr_char
end
end
for_state :get_comparison_operator do
for_characters all_characters - cmp_operators do
switch_to Dhaka::TOKENIZER_IDLE_STATE
end
for_characters cmp_operators do
first_char = curr_char
curr_token.symbol_name = cmp_lookup_level1[curr_char]
advance
if lookup = cmp_lookup_level2[first_char]
if name = lookup[curr_char]
curr_token.symbol_name = name
advance
else
switch_to :error
end
end
switch_to Dhaka::TOKENIZER_IDLE_STATE
end
end
protected
def word_literal_symbol(word)
word = word.downcase
if CONNECTORS.include? word
word
elsif BOOLEANS.include? word
'bool_literal'
else
'word_literal'
end
end
def cmp_lookup_level1
{ '<' => 'less_than',
'>' => 'greater_than',
'=' => 'equal_to',
'!' => 'not' }
end
def cmp_lookup_level2
{ '=' => {
'=' => 'equal_to',
},
'<' => {
'=' => 'less_or_equal_than',
'>' => 'different_to',
},
'>' => {
'=' => 'greater_or_equal_than',
},
'!' => {
'=' => 'different_to',
}
}
end
end
require 'test/unit'
require 'dhaka'
require 'boolean_expression_tokenizer'
class TestBooleanExpressionTokenizer < Test::Unit::TestCase
def test_tokenizes_boolean_expressions
tokenizer = BooleanExpressionTokenizer
assert_kind_of(Class, tokenizer.tokenize("2 * 3 + 2 >= sqrt(4) and sux == 3").class)
assert_equal("Dhaka::TokenizerSuccessResult", tokenizer.tokenize("2 * 3 + 2 >= sqrt(4) and sux == 3").class.inspect)
assert_kind_of(Class, tokenizer.tokenize("| % ^").class)
assert_equal("Dhaka::TokenizerErrorResult", tokenizer.tokenize("| % ^").class.inspect)
end
end
require 'dhaka'
require 'boolean_expression_grammar'
requires = "require 'boolean_expression_grammar'\n\n"
parser = Dhaka::Parser.new(BooleanExpressionGrammar)
File.open('boolean_expression_parser.rb', 'w+') do |file|
file.write requires + parser.compile_to_ruby_source_as(:BooleanExpressionParser)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment