py-in-the-sky · January 3, 2018 07:00
diff --git a/very_simple_arithmetic_evaluator.py b/very_simple_arithmetic_evaluator.py
 """
 Very Simple Integer-arithmetic Evaluator


 Grammar:

    ARITHMETIC  =>  TERM | TERM [+-] ARITHMETIC
    TERM        =>  FACTOR | FACTOR [*/] TERM
    FACTOR      =>  INT | '(' ARITHMETIC ')' | [+-] FACTOR
    INT         =>  [1-9][0-9]*


 This code is an exercise in parsing arithmetic into a binary expression tree
 (https://en.wikipedia.org/wiki/Binary_expression_tree) and then recursively
 evaluating that tree. In the evaluation of the tree, this code hands off each
 binary arithmetic operation to the built-in operators (+, -, *, and /) in Python.
 If you're looking for how to implement the binary operators, you won't find the
 answer in this code. If you're curious about binary expression trees and parsing
 context-free languages, then hopefully this will be useful.

 Also see a very simple JSON parser: https://gist.github.com/py-in-the-sky/02d18c427c07658adf0261a572e442d9


 Two simplifications:

    1. Only integers are handled, not floats.

    2. Exponents are not handled.


 Big-O performance characteristics: O(N) time and O(N) extra space,
 where N = len(arithmetic_expression).


 Note that the parsing functions below take a `left_tree` argumet; recursive
 calls to the parsing functions pass into the `left_tree` argument the binary
 expression tree that's been constructed so far. This supports the creation of
 left-associative binary expression trees, which in turn support the correct
 evaluation of the arithmetic. See how addition/subtraction are left associative:
 '1 - 2 + 3' is properly evaluated with left association ('(1 - 2) + 3') and
 not with right association ('1 - (2 + 3)'). Similarly, see how multiplication/division
 is left association: '1 / 2 * 3' is properly evaluated as '(1 / 2) * 3', not as
 '1 / (2 * 3)'. When a left-associative binary expression tree is evaluated via
 a post-order traversal, then the encoded arithmetic expression is evaluated
 properly, in a left-associative manner.


 Exercises for the reader:
 1. Change the code to handle floats, in addition to integers.
 2. Change the code to handle the exponent '**' binary operator.
 3. Read the code and convince yourself that it operates on binary expressions
 from left to right while obeying PEMDAS (https://en.wikipedia.org/wiki/Order_of_operations#Mnemonics).
    E.g.: 1 + 2 * 3 + 4 => 1 + 6 + 4 => 7 + 4 => 11
    E.g.: 2 * 3 + 1 * 1 + 5 * (9 + 2 * 5) => 6 + 1 * 1 + 5 * (9 + 2 * 5) => 6 + 1 + 5 * (9 + 2 * 5)
            => 7 + 5 * (9 + 2 * 5) => 7 + 5 * (9 + 10) => 7 + 5 * 19 => 7 + 95 => 102
 4. Develop nice validation/error messages, like those in
 https://gist.github.com/py-in-the-sky/02d18c427c07658adf0261a572e442d9
 5. Read the code and convince yourself of the Big-O performance characteristics declared above. (It
 may help to keep in mind that the parser is a recursive-descent parser.)
 6. There are several ways to traverse a binary tree, three of which are in-order traversal, pre-order traversal,
 and post-order traversal. In the case of evaluating a binary expression tree in order to get the final number
 value of the expression, only post-order traversal makes sense. Why?
 7. Debugging tool: develop a `pretty_print` function to print binary expression trees. E.g.:
    pretty_print(parse('(1 + 2) * 3 + 4'))
                        +
                    /       \
                    *       4
                /       \
                +       3
            /       \
            1       2
 """


 from __future__ import division  # Tests pass both with and without this line uncommented.


 ### Top-level Function

 def eval_arithmetic(arithmetic_expression):
    """Given a string encoded with an arithmetic expression,
    returns the number to which the arithmetic expression evaluates.

    E.g.:
    >>> eval_arithmetic('1 + 2 * (3 + 4) - 5')
    10
    """
    tokens = tokenize(arithmetic_expression)
    binary_expression_tree = parse(tokens)
    number = evaluate(binary_expression_tree)
    return number


 ### Tokenize

 is_token = lambda char: char in '+-*/()'
 is_digit = lambda char: '0' <= char <= '9'
 is_leading_digit = lambda char: '1' <= char <= '9'
 is_whitespace = lambda char: char == ' '


 def tokenize(arithmetic_expression):
    def _tokens():
        i = 0

        while i < len(arithmetic_expression):
            if is_token(arithmetic_expression[i]):
                yield arithmetic_expression[i]
                i += 1
            elif is_leading_digit(arithmetic_expression[i]):
                i, integer = _parse_integer(i)
                yield integer
            else:
                assert is_whitespace(arithmetic_expression[i])
                i += 1

    def _parse_integer(i):
        j = i + 1

        while j < len(arithmetic_expression) and is_digit(arithmetic_expression[j]):
            j += 1

        return j, int(arithmetic_expression[i:j])

    return tuple(_tokens())


 ### Parse

 is_expression_opening_token = lambda token: token == '('
 is_expression_closing_token = lambda token: token == ')'
 is_term_closing_token = lambda token: token in tuple('+-)')


 def parse(tokens):
    return parse_arithmetic_expression(tokens, 0)[1]


 def parse_arithmetic_expression(tokens, i, left_tree=None):
    if left_tree is None:  # very beginning of expression
        i, term = parse_term(tokens, i)
        return parse_arithmetic_expression(tokens, i, term)
    elif i == len(tokens) or is_expression_closing_token(tokens[i]):  # very end of expression
        return i, left_tree
    else:  # in the middle of expression
        assert tokens[i] in '+-'
        operator = tokens[i]
        i, term = parse_term(tokens, i+1)
        return parse_arithmetic_expression(tokens, i, (left_tree, operator, term))


 def parse_term(tokens, i, left_tree=None):
    if left_tree is None:  # very beginning of term
        i, factor = parse_factor(tokens, i)
        return parse_term(tokens, i, factor)
    elif i == len(tokens) or is_term_closing_token(tokens[i]):  # very end of term
        return i, left_tree
    else:  # in the middle of term
        assert tokens[i] in '*/'
        operator = tokens[i]
        i, factor = parse_factor(tokens, i+1)
        return parse_term(tokens, i, (left_tree, operator, factor))


 def parse_factor(tokens, i, positive=True):
    if isinstance(tokens[i], int):
        return i+1, tokens[i] if positive else -tokens[i]
    elif is_expression_opening_token(tokens[i]):
        i, expression = parse_arithmetic_expression(tokens, i+1)
        assert is_expression_closing_token(tokens[i])
        return i+1, expression if positive else (-1, '*', expression)
    else:
        assert tokens[i] in '+-'  # Unary operators
        return parse_factor(tokens, i+1, positive if tokens[i] == '+' else not positive)


 ### Evaluate

 import operator as op


 OPERATIONS = {
    '+': op.add,
    '-': op.sub,
    '*': op.mul,
    '/': lambda a,b: a / b  # `op.div` is not influenced by `from __future__ import division`.
 }


 def evaluate(binary_expression_tree):
    if isinstance(binary_expression_tree, int):  # Leaf node
        return binary_expression_tree
    elif len(binary_expression_tree) == 1:
        return evaluate(binary_expression_tree[0])  # E.g., '(2)'
    else:
        assert len(binary_expression_tree) == 3
        left_value = evaluate(binary_expression_tree[0])
        operation = OPERATIONS[binary_expression_tree[1]]
        right_value = evaluate(binary_expression_tree[2])
        return operation(left_value, right_value)


 ### Test

 import random


 COIN = HEADS, TAILS = True, False
 coin_flip = lambda: random.choice(COIN)

 FACTOR_OPTIONS = INT, PARENTHETIC_EXPRESSION, UNARY_OPERATOR = 'int paren unary'.split()
 random_factor_option = lambda: random.choice(FACTOR_OPTIONS)

 RECURSION_DEPTH = 4
 max_recursion_depth_reached = lambda n: n >= RECURSION_DEPTH

 random_leading_digit = lambda: str(random.randint(1, 9))
 random_trailing_digit = lambda: str(random.randint(0, 9))


 def generate_arithmetic_expression(depth=0):
    if max_recursion_depth_reached(depth) or coin_flip() is HEADS:
        return generate_term(depth)
    else:
        operator = random.choice('+-')
        return ' '.join([generate_term(depth), operator, generate_arithmetic_expression(depth + 1)])


 def generate_term(depth):
    if max_recursion_depth_reached(depth) or coin_flip() is HEADS:
        return generate_factor(depth)
    else:
        operator = random.choice('*/')
        return ' '.join([generate_factor(depth), operator, generate_term(depth + 1)])


 def generate_factor(depth):
    factor_option = random_factor_option()

    if factor_option is INT:
        leading_digit = random_leading_digit()
        trailing_digits = ''.join(random_trailing_digit() for _ in xrange(random.randint(0, 5)))
        return leading_digit + trailing_digits
    elif factor_option is PARENTHETIC_EXPRESSION:
        return '(' + generate_arithmetic_expression(depth + 1) + ')'
    else:
        unary_operator = random.choice('+-')
        return unary_operator + generate_factor(depth + 1)


 def tests():
    divide_by_zero_count = 0
    test_case_count = 1000
    random_test_cases = [generate_arithmetic_expression() for _ in xrange(test_case_count)]
    hard_coded_test_cases = [
        '4 / 2 * 3 + 4 - 6',
        '2 * -2',
        '2 - 2',
        '4 / 2 * (3 + 4) - 6',
        '-23 / ((4 - 6) * (20 + 202 - 8) / 4) / 3 * 3405 * 3 / 5 / 6 / -7 - 908',
        '-23 / ((4 - 6) * (20 + 202 - 8) / 4) / 3 * 3405 * 3 / 5 / -(-(3 + 3)) / -(3 + 4 + -10/-10) - 908',
        '(2)',
        '1/2/2',
        '-2 + -2',
        '(-2 + -2)',
        '-(-2 + -2)',
        '-(1 + 1) + -(1 + 1)',
        '1 -1',
        '1-1',
        '4*3-1*5*5/5',
        '4032 / 7040083',
        '1 + 2 * (3 + 4) - 5',
    ]

    for arithmetic_expression in random_test_cases+hard_coded_test_cases:
        try:
            actual = eval_arithmetic(arithmetic_expression)
            expected = eval(arithmetic_expression)  # Comparing against Python's built-in `eval`.
            print '{} = {} (actual) = {} (expected)\n'.format(arithmetic_expression, actual, expected)
            assert actual == expected
        except ZeroDivisionError:
            print 'Division by zero: {}\n'.format(arithmetic_expression)
            divide_by_zero_count += 1

    print 'Tests pass! ({} test cases out of {} threw ZeroDivisionError.)'.format(divide_by_zero_count, test_case_count)


 if __name__ == '__main__':
    tests()
	"""
	Very Simple Integer-arithmetic Evaluator


	Grammar:

	ARITHMETIC => TERM \| TERM [+-] ARITHMETIC
	TERM => FACTOR \| FACTOR [*/] TERM
	FACTOR => INT \| '(' ARITHMETIC ')' \| [+-] FACTOR
	INT => [1-9][0-9]*


	This code is an exercise in parsing arithmetic into a binary expression tree
	(https://en.wikipedia.org/wiki/Binary_expression_tree) and then recursively
	evaluating that tree. In the evaluation of the tree, this code hands off each
	binary arithmetic operation to the built-in operators (+, -, *, and /) in Python.
	If you're looking for how to implement the binary operators, you won't find the
	answer in this code. If you're curious about binary expression trees and parsing
	context-free languages, then hopefully this will be useful.

	Also see a very simple JSON parser: https://gist.github.com/py-in-the-sky/02d18c427c07658adf0261a572e442d9


	Two simplifications:

	1. Only integers are handled, not floats.

	2. Exponents are not handled.


	Big-O performance characteristics: O(N) time and O(N) extra space,
	where N = len(arithmetic_expression).


	Note that the parsing functions below take a `left_tree` argumet; recursive
	calls to the parsing functions pass into the `left_tree` argument the binary
	expression tree that's been constructed so far. This supports the creation of
	left-associative binary expression trees, which in turn support the correct
	evaluation of the arithmetic. See how addition/subtraction are left associative:
	'1 - 2 + 3' is properly evaluated with left association ('(1 - 2) + 3') and
	not with right association ('1 - (2 + 3)'). Similarly, see how multiplication/division
	is left association: '1 / 2 * 3' is properly evaluated as '(1 / 2) * 3', not as
	'1 / (2 * 3)'. When a left-associative binary expression tree is evaluated via
	a post-order traversal, then the encoded arithmetic expression is evaluated
	properly, in a left-associative manner.


	Exercises for the reader:
	1. Change the code to handle floats, in addition to integers.
	2. Change the code to handle the exponent '**' binary operator.
	3. Read the code and convince yourself that it operates on binary expressions
	from left to right while obeying PEMDAS (https://en.wikipedia.org/wiki/Order_of_operations#Mnemonics).
	E.g.: 1 + 2 * 3 + 4 => 1 + 6 + 4 => 7 + 4 => 11
	E.g.: 2 * 3 + 1 * 1 + 5 * (9 + 2 * 5) => 6 + 1 * 1 + 5 * (9 + 2 * 5) => 6 + 1 + 5 * (9 + 2 * 5)
	=> 7 + 5 * (9 + 2 * 5) => 7 + 5 * (9 + 10) => 7 + 5 * 19 => 7 + 95 => 102
	4. Develop nice validation/error messages, like those in
	https://gist.github.com/py-in-the-sky/02d18c427c07658adf0261a572e442d9
	5. Read the code and convince yourself of the Big-O performance characteristics declared above. (It
	may help to keep in mind that the parser is a recursive-descent parser.)
	6. There are several ways to traverse a binary tree, three of which are in-order traversal, pre-order traversal,
	and post-order traversal. In the case of evaluating a binary expression tree in order to get the final number
	value of the expression, only post-order traversal makes sense. Why?
	7. Debugging tool: develop a `pretty_print` function to print binary expression trees. E.g.:
	pretty_print(parse('(1 + 2) * 3 + 4'))
	+
	/ \
	* 4
	/ \
	+ 3
	/ \
	1 2
	"""


	from __future__ import division # Tests pass both with and without this line uncommented.


	### Top-level Function

	def eval_arithmetic(arithmetic_expression):
	"""Given a string encoded with an arithmetic expression,
	returns the number to which the arithmetic expression evaluates.

	E.g.:
	>>> eval_arithmetic('1 + 2 * (3 + 4) - 5')
	10
	"""
	tokens = tokenize(arithmetic_expression)
	binary_expression_tree = parse(tokens)
	number = evaluate(binary_expression_tree)
	return number


	### Tokenize

	is_token = lambda char: char in '+-*/()'
	is_digit = lambda char: '0' <= char <= '9'
	is_leading_digit = lambda char: '1' <= char <= '9'
	is_whitespace = lambda char: char == ' '


	def tokenize(arithmetic_expression):
	def _tokens():
	i = 0

	while i < len(arithmetic_expression):
	if is_token(arithmetic_expression[i]):
	yield arithmetic_expression[i]
	i += 1
	elif is_leading_digit(arithmetic_expression[i]):
	i, integer = _parse_integer(i)
	yield integer
	else:
	assert is_whitespace(arithmetic_expression[i])
	i += 1

	def _parse_integer(i):
	j = i + 1

	while j < len(arithmetic_expression) and is_digit(arithmetic_expression[j]):
	j += 1

	return j, int(arithmetic_expression[i:j])

	return tuple(_tokens())


	### Parse

	is_expression_opening_token = lambda token: token == '('
	is_expression_closing_token = lambda token: token == ')'
	is_term_closing_token = lambda token: token in tuple('+-)')


	def parse(tokens):
	return parse_arithmetic_expression(tokens, 0)[1]


	def parse_arithmetic_expression(tokens, i, left_tree=None):
	if left_tree is None: # very beginning of expression
	i, term = parse_term(tokens, i)
	return parse_arithmetic_expression(tokens, i, term)
	elif i == len(tokens) or is_expression_closing_token(tokens[i]): # very end of expression
	return i, left_tree
	else: # in the middle of expression
	assert tokens[i] in '+-'
	operator = tokens[i]
	i, term = parse_term(tokens, i+1)
	return parse_arithmetic_expression(tokens, i, (left_tree, operator, term))


	def parse_term(tokens, i, left_tree=None):
	if left_tree is None: # very beginning of term
	i, factor = parse_factor(tokens, i)
	return parse_term(tokens, i, factor)
	elif i == len(tokens) or is_term_closing_token(tokens[i]): # very end of term
	return i, left_tree
	else: # in the middle of term
	assert tokens[i] in '*/'
	operator = tokens[i]
	i, factor = parse_factor(tokens, i+1)
	return parse_term(tokens, i, (left_tree, operator, factor))


	def parse_factor(tokens, i, positive=True):
	if isinstance(tokens[i], int):
	return i+1, tokens[i] if positive else -tokens[i]
	elif is_expression_opening_token(tokens[i]):
	i, expression = parse_arithmetic_expression(tokens, i+1)
	assert is_expression_closing_token(tokens[i])
	return i+1, expression if positive else (-1, '*', expression)
	else:
	assert tokens[i] in '+-' # Unary operators
	return parse_factor(tokens, i+1, positive if tokens[i] == '+' else not positive)


	### Evaluate

	import operator as op


	OPERATIONS = {
	'+': op.add,
	'-': op.sub,
	'*': op.mul,
	'/': lambda a,b: a / b # `op.div` is not influenced by `from __future__ import division`.
	}


	def evaluate(binary_expression_tree):
	if isinstance(binary_expression_tree, int): # Leaf node
	return binary_expression_tree
	elif len(binary_expression_tree) == 1:
	return evaluate(binary_expression_tree[0]) # E.g., '(2)'
	else:
	assert len(binary_expression_tree) == 3
	left_value = evaluate(binary_expression_tree[0])
	operation = OPERATIONS[binary_expression_tree[1]]
	right_value = evaluate(binary_expression_tree[2])
	return operation(left_value, right_value)


	### Test

	import random


	COIN = HEADS, TAILS = True, False
	coin_flip = lambda: random.choice(COIN)

	FACTOR_OPTIONS = INT, PARENTHETIC_EXPRESSION, UNARY_OPERATOR = 'int paren unary'.split()
	random_factor_option = lambda: random.choice(FACTOR_OPTIONS)

	RECURSION_DEPTH = 4
	max_recursion_depth_reached = lambda n: n >= RECURSION_DEPTH

	random_leading_digit = lambda: str(random.randint(1, 9))
	random_trailing_digit = lambda: str(random.randint(0, 9))


	def generate_arithmetic_expression(depth=0):
	if max_recursion_depth_reached(depth) or coin_flip() is HEADS:
	return generate_term(depth)
	else:
	operator = random.choice('+-')
	return ' '.join([generate_term(depth), operator, generate_arithmetic_expression(depth + 1)])


	def generate_term(depth):
	if max_recursion_depth_reached(depth) or coin_flip() is HEADS:
	return generate_factor(depth)
	else:
	operator = random.choice('*/')
	return ' '.join([generate_factor(depth), operator, generate_term(depth + 1)])


	def generate_factor(depth):
	factor_option = random_factor_option()

	if factor_option is INT:
	leading_digit = random_leading_digit()
	trailing_digits = ''.join(random_trailing_digit() for _ in xrange(random.randint(0, 5)))
	return leading_digit + trailing_digits
	elif factor_option is PARENTHETIC_EXPRESSION:
	return '(' + generate_arithmetic_expression(depth + 1) + ')'
	else:
	unary_operator = random.choice('+-')
	return unary_operator + generate_factor(depth + 1)


	def tests():
	divide_by_zero_count = 0
	test_case_count = 1000
	random_test_cases = [generate_arithmetic_expression() for _ in xrange(test_case_count)]
	hard_coded_test_cases = [
	'4 / 2 * 3 + 4 - 6',
	'2 * -2',
	'2 - 2',
	'4 / 2 * (3 + 4) - 6',
	'-23 / ((4 - 6) * (20 + 202 - 8) / 4) / 3 * 3405 * 3 / 5 / 6 / -7 - 908',
	'-23 / ((4 - 6) * (20 + 202 - 8) / 4) / 3 * 3405 * 3 / 5 / -(-(3 + 3)) / -(3 + 4 + -10/-10) - 908',
	'(2)',
	'1/2/2',
	'-2 + -2',
	'(-2 + -2)',
	'-(-2 + -2)',
	'-(1 + 1) + -(1 + 1)',
	'1 -1',
	'1-1',
	'43-15*5/5',
	'4032 / 7040083',
	'1 + 2 * (3 + 4) - 5',
	]

	for arithmetic_expression in random_test_cases+hard_coded_test_cases:
	try:
	actual = eval_arithmetic(arithmetic_expression)
	expected = eval(arithmetic_expression) # Comparing against Python's built-in `eval`.
	print '{} = {} (actual) = {} (expected)\n'.format(arithmetic_expression, actual, expected)
	assert actual == expected
	except ZeroDivisionError:
	print 'Division by zero: {}\n'.format(arithmetic_expression)
	divide_by_zero_count += 1

	print 'Tests pass! ({} test cases out of {} threw ZeroDivisionError.)'.format(divide_by_zero_count, test_case_count)


	if __name__ == '__main__':
	tests()