Created
January 28, 2009 10:38
-
-
Save vjt/53893 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Originally posted on http://pastie.org/pastes/36861 (31 January 2007) | |
Boolean/Mathematical Expressions Parser, built with the *excellent* Dhaka language | |
generation library, available on http://dhaka.rubyforge.org/. | |
Have a look at the extensive test suite for insights on how to use this software, | |
especially the protected methods in `test/boolean_expression_parser_test.rb`. | |
The software is licensed under the same terms as Ruby. | |
Have fun! | |
- [email protected] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Driver. | |
# - [email protected] | |
# | |
require 'boolean_expression_tokenizer' | |
require 'boolean_expression_parser' | |
require 'boolean_expression_evaluator' | |
class BooleanExpressionDriver | |
def initialize(variables = {}) | |
@evaluator = BooleanExpressionEvaluator.new(variables) | |
end | |
def variables | |
@evaluator.variables | |
end | |
def variables=(variables) | |
@evaluator.variables = variables | |
end | |
def evaluate(expression, variables = {}) | |
tokenize_result = BooleanExpressionTokenizer.tokenize(expression) | |
if tokenize_result.has_error? | |
raise BooleanExpressionError, tokenize_error_message(tokenize_result.unexpected_char_index, expression) | |
end | |
parse_result = BooleanExpressionParser.parse(tokenize_result) | |
if parse_result.has_error? | |
raise BooleanExpressionError, parse_error_message(parse_result.unexpected_token, expression) | |
end | |
@evaluator.variables.update(variables) | |
@evaluator.evaluate(parse_result.parse_tree) | |
end | |
protected | |
def tokenize_error_message unexpected_char_index, expression | |
raise BooleanExpressionError, "Unexpected character #{expression[unexpected_char_index].chr}:\n#{marked_expression(expression, unexpected_char_index)}" | |
end | |
def parse_error_message unexpected_token, expression | |
if unexpected_token.symbol_name == Dhaka::END_SYMBOL_NAME | |
"Unexpected end of file." | |
else | |
"Unexpected token #{unexpected_token.symbol_name}:\n#{marked_expression(expression, unexpected_token.input_position)}" | |
end | |
end | |
def marked_expression(expression, error_position) | |
if error_position > 0 | |
expression[0..(error_position-1)] + ">>>" + expression[error_position..-1] | |
else | |
">>>"+expression | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Evaluator. | |
# - [email protected] | |
# | |
require 'dhaka' | |
require 'boolean_expression_grammar' | |
class BooleanExpressionEvaluator < Dhaka::Evaluator | |
self.grammar = BooleanExpressionGrammar | |
define_evaluation_rules do | |
# Start symbol | |
# | |
for_simple_expression do | |
evaluate(child_nodes[0]) | |
end | |
for_math_expression do | |
evaluate(child_nodes[0]) | |
end | |
# Bool expression | |
# | |
for_bool_value do | |
evaluate(child_nodes[0]) | |
end | |
for_or_expression do | |
evaluate(child_nodes[0]) or evaluate(child_nodes[2]) | |
end | |
for_and_expression do | |
evaluate(child_nodes[0]) and evaluate(child_nodes[2]) | |
end | |
# Bool value | |
# | |
for_bool_literal do | |
child_nodes[0].token.value == 'true' | |
end | |
for_less_than do | |
evaluate(child_nodes[0]) < evaluate(child_nodes[2]) | |
end | |
for_less_or_equal_than do | |
evaluate(child_nodes[0]) <= evaluate(child_nodes[2]) | |
end | |
for_greater_than do | |
evaluate(child_nodes[0]) > evaluate(child_nodes[2]) | |
end | |
for_greater_or_equal_than do | |
evaluate(child_nodes[0]) >= evaluate(child_nodes[2]) | |
end | |
for_equal_to do | |
evaluate(child_nodes[0]) == evaluate(child_nodes[2]) | |
end | |
for_different_to do | |
evaluate(child_nodes[0]) != evaluate(child_nodes[2]) | |
end | |
for_parenthesized_bool_expression do | |
evaluate(child_nodes[1]) | |
end | |
for_negated_bool_expression do | |
not evaluate(child_nodes[1]) | |
end | |
# Math expression | |
# | |
for_subtraction do | |
evaluate(child_nodes[0]) - evaluate(child_nodes[2]) | |
end | |
for_addition do | |
evaluate(child_nodes[0]) + evaluate(child_nodes[2]) | |
end | |
for_division do | |
evaluate(child_nodes[0]) / evaluate(child_nodes[2]) | |
end | |
for_multiplication do | |
evaluate(child_nodes[0]) * evaluate(child_nodes[2]) | |
end | |
for_int_literal do | |
child_nodes[0].token.value.to_i | |
end | |
for_float_literal do | |
child_nodes[0].token.value.to_f | |
end | |
for_parenthesized_math_expression do | |
evaluate(child_nodes[1]) | |
end | |
for_negated_math_expression do | |
-evaluate(child_nodes[1]) | |
end | |
for_power do | |
evaluate(child_nodes[0])**evaluate(child_nodes[2]) | |
end | |
for_variable_name do | |
child_nodes[0].token.value | |
end | |
for_function_name do | |
child_nodes[0].token.value | |
end | |
for_bool_name do | |
child_nodes[0].token.value | |
end | |
for_variable_reference do | |
variable_name = evaluate(child_nodes[0]) | |
raise BooleanExpressionError, "Undefined variable #{variable_name}" unless @variables.has_key? variable_name | |
@variables[variable_name] | |
end | |
for_function_call do | |
function_name = evaluate(child_nodes[0]) | |
raise BooleanExpressionError, "Invalid function #{function_name}" unless Math.respond_to? function_name | |
argument = evaluate(child_nodes[2]) | |
if /^(sin|cos|tan)/.match function_name | |
argument = argument * Math::PI / 180 | |
end | |
Math.send function_name, argument | |
end | |
end | |
attr_accessor :variables | |
def initialize(variables = {}) | |
@variables = variables | |
end | |
end | |
class BooleanExpressionError < StandardError | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Grammar. | |
# - [email protected] | |
# | |
require 'dhaka' | |
class BooleanExpressionGrammar < Dhaka::Grammar | |
precedences do | |
left ['less_than', 'less_or_equal_than', 'greater_than', 'greater_or_equal_than', 'equal_to', 'different_to'] | |
left ['+', '-'] | |
left ['*', '/'] | |
nonassoc ['^'] | |
left ['or'] | |
left ['and'] | |
nonassoc ['not'] | |
end | |
for_symbol(Dhaka::START_SYMBOL_NAME) do | |
simple_expression ['bool_expression'] | |
math_expression ['math_expression'] | |
end | |
for_symbol('bool_expression') do | |
bool_value ['bool_value'] | |
or_expression ['bool_expression', 'or', 'bool_value'] # this is a cargo cult | |
and_expression ['bool_expression', 'and', 'bool_expression'] | |
end | |
for_symbol('bool_value') do | |
bool_literal ['bool_literal'] | |
less_than ['math_expression', 'less_than', 'math_expression'] | |
less_or_equal_than ['math_expression', 'less_or_equal_than', 'math_expression'] | |
greater_than ['math_expression', 'greater_than', 'math_expression'] | |
greater_or_equal_than ['math_expression', 'greater_or_equal_than', 'math_expression'] | |
equal_to ['math_expression', 'equal_to', 'math_expression'] | |
different_to ['math_expression', 'different_to', 'math_expression'] | |
parenthesized_bool_expression ['(', 'bool_expression', ')'] | |
negated_bool_expression ['not', 'bool_expression'] | |
end | |
for_symbol('math_expression') do | |
addition ['math_expression', '+', 'math_expression'] | |
subtraction ['math_expression', '-', 'math_expression'] | |
multiplication ['math_expression', '*', 'math_expression'] | |
division ['math_expression', '/', 'math_expression'] | |
power ['math_expression', '^', 'math_expression'] | |
int_literal ['int_literal'] | |
float_literal ['float_literal'] | |
variable_reference ['var_name'] | |
function_call ['func_name', '(', 'math_expression', ')'] | |
parenthesized_math_expression ['(', 'math_expression', ')'] | |
negated_math_expression ['-', 'math_expression'], :prec => '*' | |
end | |
for_symbol('var_name') do | |
variable_name ['word_literal'] | |
end | |
for_symbol('func_name') do | |
function_name ['word_literal'] | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'test/unit' | |
require 'boolean_expression_grammar' | |
require 'boolean_expression_tokenizer' | |
require 'boolean_expression_evaluator' | |
require 'fake_logger' | |
class Numeric | |
def radians | |
self * Math::PI / 180 | |
end | |
end | |
include Math | |
class TestBooleanExpressionParser < Test::Unit::TestCase | |
def test_compilation_of_parser | |
logger = FakeLogger.new | |
parser = Dhaka::Parser.new(BooleanExpressionGrammar, logger) | |
eval(parser.compile_to_ruby_source_as(:BooleanExpressionParser)) | |
assert_equal(34, logger.warnings.size) | |
assert_equal(0, logger.errors.size) | |
end | |
def test_integer_expressions # !> private attribute? | |
assert_correct_evaluation '5 * -14 / (2 * 7 - 7) + 2' | |
assert_correct_evaluation '-2 ** 2' | |
assert_correct_evaluation '2 + 2 ** 3' | |
assert_correct_evaluation '(2 + 2) ** 3' | |
assert_correct_evaluation '(2 + 2) ** 3 * 2' | |
end | |
def test_variables_reference | |
assert_correct_evaluation '(2 + foo) * bar / 2', 'foo' => 4, 'bar' => 420 | |
assert_correct_evaluation 'foo + 2 + sqrt(bar) * log(baz)', 'foo' => Math::PI, 'bar' => 31, 'baz' => 3.7 | |
end | |
def test_float_expressions | |
assert_correct_evaluation '-5.0 + 1.0' | |
assert_correct_evaluation '4.1 * 2 + 0.3' | |
assert_correct_evaluation '0.3 + 3 * 2 ** 3 + 0.4' | |
end | |
def test_function_calls | |
[2, 3, 7, 9, 12, 420, 4935].each do |num| | |
%w(sqrt log).each do |func| | |
assert_correct_evaluation "#{func}(#{num})" | |
end | |
%w(sin cos tan).each do |func| | |
assert_equal(Math.send(func, num.radians), evaluate("#{func}(#{num})")) | |
end | |
end | |
[0.2, 0.4, 0.83, 0.92, 0.11, 0.753].each do |num| | |
%w(asin acos atan).each do |func| | |
assert_correct_evaluation "#{func}(#{num})" | |
end | |
end | |
end | |
def test_some_boolean_expressions | |
assert_correct_evaluation 'a * 45 + sin(14) >= 3 - log(45) or b == 2', 'a' => 5, 'b' => 2, 'c' => 34, 'd' => 1 | |
assert_correct_evaluation 'a * d + cos(c) >= 3 - log(45) and b == 2', 'a' => 5, 'b' => 2, 'c' => 34, 'd' => 1 | |
assert_correct_evaluation '2 > 2' | |
assert_correct_evaluation '2 == 2' | |
assert_correct_evaluation '2 > 1 or 2 < 1' | |
assert_correct_evaluation '2 > 1 and 2 < 1' | |
assert_correct_evaluation '10 * 2 >= 4 + 0 * 2' | |
assert_correct_evaluation 'true and false or true' | |
assert_correct_evaluation 'true or false or true and false' | |
assert_correct_evaluation 'true and false' | |
assert_correct_evaluation 'true or false' | |
assert_correct_evaluation 'false and true or false' | |
assert_correct_evaluation 'true and false or true and true' | |
assert_correct_evaluation '(true and false) or true' | |
assert_correct_evaluation '(false or (true and false)) and (true or false)' | |
end | |
def test_parse_errors | |
assert(tokenize("1.3.4 * 2").has_error?) | |
assert(parse("(2+2)^3^2").has_error?) | |
assert_raise(BooleanExpressionError) { evaluate("suxalot(4)") } | |
end | |
protected | |
def tokenize(input) | |
BooleanExpressionTokenizer.tokenize(input) | |
end | |
def parse(input) | |
input = tokenize(input) if input.is_a? String | |
BooleanExpressionParser.parse input | |
end | |
def evaluate(input, variables = {}) | |
input = parse(input) if input.is_a? String | |
@_eval ||= BooleanExpressionEvaluator.new | |
@_eval.variables = variables unless variables.empty? | |
@_eval.evaluate input.parse_tree | |
end | |
# let's ask ruby itself if our computation is correct.. :) | |
# | |
def assert_correct_evaluation(*expressions) | |
expr_variables = expressions.pop if expressions.last.is_a? Hash | |
expr_variables ||= {} | |
ruby_variables = expr_variables.map { |var, value| "#{var}=#{value};" }.join | |
expressions.each do |expr| | |
assert_equal eval(ruby_variables + expr).to_s, evaluate(expr, expr_variables).to_s | |
end | |
end | |
end | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tokenizer. | |
# - [email protected] | |
# | |
require 'dhaka' | |
class BooleanExpressionTokenizer < Dhaka::Tokenizer | |
CONNECTORS = [ 'and', 'or', 'not' ] | |
BOOLEANS = [ 'true', 'false' ] | |
digits = ('0'..'9').to_a | |
letters = ('a'..'z').to_a + ('A'..'Z').to_a | |
parens = ['(', ')'] | |
dollar = ['$'] | |
dot = ['.'] | |
operators = ['-', '+', '/', '*', '^'] | |
cmp_operators = ['<', '>', '=', '!'] | |
whitespace = [' ', "\n", "\r", "\t"] | |
all_characters = digits + dollar + letters + parens + operators + cmp_operators + whitespace | |
for_state Dhaka::TOKENIZER_IDLE_STATE do | |
for_characters(all_characters - (digits + dot + dollar + letters + cmp_operators + whitespace)) do | |
create_token(curr_char, nil) | |
advance | |
# ** alias for ^ | |
if curr_char == '*' and curr_token.symbol_name == '*' | |
curr_token.symbol_name = '^' | |
advance | |
end | |
end | |
for_characters digits do | |
create_token('int_literal', '') | |
switch_to :get_number_literal | |
end | |
for_characters letters do | |
create_token(nil, '') | |
switch_to :get_word_literal | |
end | |
for_character dollar do | |
advance # skip the dollar | |
create_token(nil, '$') | |
switch_to :get_word_literal | |
end | |
for_characters cmp_operators do | |
create_token('', '') | |
switch_to :get_comparison_operator | |
end | |
for_character whitespace do | |
advance | |
end | |
end | |
for_state :get_number_literal do | |
for_characters all_characters - digits do | |
switch_to Dhaka::TOKENIZER_IDLE_STATE | |
end | |
for_character dot do | |
if curr_token.symbol_name == 'float_literal' | |
switch_to :error | |
else | |
curr_token.symbol_name = 'float_literal' | |
curr_token.value += curr_char | |
advance | |
end | |
end | |
for_characters digits do | |
curr_token.value += curr_char | |
advance | |
end | |
end | |
for_state :get_word_literal do | |
for_characters all_characters - (letters + digits) do | |
curr_token.symbol_name = word_literal_symbol(curr_token.value) | |
switch_to Dhaka::TOKENIZER_IDLE_STATE | |
end | |
for_characters letters + digits do | |
curr_token.value += curr_char | |
advance | |
curr_token.symbol_name = word_literal_symbol(curr_token.value) unless curr_char | |
end | |
end | |
for_state :get_comparison_operator do | |
for_characters all_characters - cmp_operators do | |
switch_to Dhaka::TOKENIZER_IDLE_STATE | |
end | |
for_characters cmp_operators do | |
first_char = curr_char | |
curr_token.symbol_name = cmp_lookup_level1[curr_char] | |
advance | |
if lookup = cmp_lookup_level2[first_char] | |
if name = lookup[curr_char] | |
curr_token.symbol_name = name | |
advance | |
else | |
switch_to :error | |
end | |
end | |
switch_to Dhaka::TOKENIZER_IDLE_STATE | |
end | |
end | |
protected | |
def word_literal_symbol(word) | |
word = word.downcase | |
if CONNECTORS.include? word | |
word | |
elsif BOOLEANS.include? word | |
'bool_literal' | |
else | |
'word_literal' | |
end | |
end | |
def cmp_lookup_level1 | |
{ '<' => 'less_than', | |
'>' => 'greater_than', | |
'=' => 'equal_to', | |
'!' => 'not' } | |
end | |
def cmp_lookup_level2 | |
{ '=' => { | |
'=' => 'equal_to', | |
}, | |
'<' => { | |
'=' => 'less_or_equal_than', | |
'>' => 'different_to', | |
}, | |
'>' => { | |
'=' => 'greater_or_equal_than', | |
}, | |
'!' => { | |
'=' => 'different_to', | |
} | |
} | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'test/unit' | |
require 'dhaka' | |
require 'boolean_expression_tokenizer' | |
class TestBooleanExpressionTokenizer < Test::Unit::TestCase | |
def test_tokenizes_boolean_expressions | |
tokenizer = BooleanExpressionTokenizer | |
assert_kind_of(Class, tokenizer.tokenize("2 * 3 + 2 >= sqrt(4) and sux == 3").class) | |
assert_equal("Dhaka::TokenizerSuccessResult", tokenizer.tokenize("2 * 3 + 2 >= sqrt(4) and sux == 3").class.inspect) | |
assert_kind_of(Class, tokenizer.tokenize("| % ^").class) | |
assert_equal("Dhaka::TokenizerErrorResult", tokenizer.tokenize("| % ^").class.inspect) | |
end | |
end | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'dhaka' | |
require 'boolean_expression_grammar' | |
requires = "require 'boolean_expression_grammar'\n\n" | |
parser = Dhaka::Parser.new(BooleanExpressionGrammar) | |
File.open('boolean_expression_parser.rb', 'w+') do |file| | |
file.write requires + parser.compile_to_ruby_source_as(:BooleanExpressionParser) | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment