Last active
January 3, 2018 07:00
-
-
Save py-in-the-sky/a2c00053b8a9ae9c70ceb37b871cebae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Very Simple Integer-arithmetic Evaluator | |
Grammar: | |
ARITHMETIC => TERM | TERM [+-] ARITHMETIC | |
TERM => FACTOR | FACTOR [*/] TERM | |
FACTOR => INT | '(' ARITHMETIC ')' | [+-] FACTOR | |
INT => [1-9][0-9]* | |
This code is an exercise in parsing arithmetic into a binary expression tree | |
(https://en.wikipedia.org/wiki/Binary_expression_tree) and then recursively | |
evaluating that tree. In the evaluation of the tree, this code hands off each | |
binary arithmetic operation to the built-in operators (+, -, *, and /) in Python. | |
If you're looking for how to implement the binary operators, you won't find the | |
answer in this code. If you're curious about binary expression trees and parsing | |
context-free languages, then hopefully this will be useful. | |
Also see a very simple JSON parser: https://gist.github.com/py-in-the-sky/02d18c427c07658adf0261a572e442d9 | |
Two simplifications: | |
1. Only integers are handled, not floats. | |
2. Exponents are not handled. | |
Big-O performance characteristics: O(N) time and O(N) extra space, | |
where N = len(arithmetic_expression). | |
Note that the parsing functions below take a `left_tree` argumet; recursive | |
calls to the parsing functions pass into the `left_tree` argument the binary | |
expression tree that's been constructed so far. This supports the creation of | |
left-associative binary expression trees, which in turn support the correct | |
evaluation of the arithmetic. See how addition/subtraction are left associative: | |
'1 - 2 + 3' is properly evaluated with left association ('(1 - 2) + 3') and | |
not with right association ('1 - (2 + 3)'). Similarly, see how multiplication/division | |
is left association: '1 / 2 * 3' is properly evaluated as '(1 / 2) * 3', not as | |
'1 / (2 * 3)'. When a left-associative binary expression tree is evaluated via | |
a post-order traversal, then the encoded arithmetic expression is evaluated | |
properly, in a left-associative manner. | |
Exercises for the reader: | |
1. Change the code to handle floats, in addition to integers. | |
2. Change the code to handle the exponent '**' binary operator. | |
3. Read the code and convince yourself that it operates on binary expressions | |
from left to right while obeying PEMDAS (https://en.wikipedia.org/wiki/Order_of_operations#Mnemonics). | |
E.g.: 1 + 2 * 3 + 4 => 1 + 6 + 4 => 7 + 4 => 11 | |
E.g.: 2 * 3 + 1 * 1 + 5 * (9 + 2 * 5) => 6 + 1 * 1 + 5 * (9 + 2 * 5) => 6 + 1 + 5 * (9 + 2 * 5) | |
=> 7 + 5 * (9 + 2 * 5) => 7 + 5 * (9 + 10) => 7 + 5 * 19 => 7 + 95 => 102 | |
4. Develop nice validation/error messages, like those in | |
https://gist.github.com/py-in-the-sky/02d18c427c07658adf0261a572e442d9 | |
5. Read the code and convince yourself of the Big-O performance characteristics declared above. (It | |
may help to keep in mind that the parser is a recursive-descent parser.) | |
6. There are several ways to traverse a binary tree, three of which are in-order traversal, pre-order traversal, | |
and post-order traversal. In the case of evaluating a binary expression tree in order to get the final number | |
value of the expression, only post-order traversal makes sense. Why? | |
7. Debugging tool: develop a `pretty_print` function to print binary expression trees. E.g.: | |
pretty_print(parse('(1 + 2) * 3 + 4')) | |
+ | |
/ \ | |
* 4 | |
/ \ | |
+ 3 | |
/ \ | |
1 2 | |
""" | |
from __future__ import division # Tests pass both with and without this line uncommented. | |
### Top-level Function | |
def eval_arithmetic(arithmetic_expression): | |
"""Given a string encoded with an arithmetic expression, | |
returns the number to which the arithmetic expression evaluates. | |
E.g.: | |
>>> eval_arithmetic('1 + 2 * (3 + 4) - 5') | |
10 | |
""" | |
tokens = tokenize(arithmetic_expression) | |
binary_expression_tree = parse(tokens) | |
number = evaluate(binary_expression_tree) | |
return number | |
### Tokenize | |
is_token = lambda char: char in '+-*/()' | |
is_digit = lambda char: '0' <= char <= '9' | |
is_leading_digit = lambda char: '1' <= char <= '9' | |
is_whitespace = lambda char: char == ' ' | |
def tokenize(arithmetic_expression): | |
def _tokens(): | |
i = 0 | |
while i < len(arithmetic_expression): | |
if is_token(arithmetic_expression[i]): | |
yield arithmetic_expression[i] | |
i += 1 | |
elif is_leading_digit(arithmetic_expression[i]): | |
i, integer = _parse_integer(i) | |
yield integer | |
else: | |
assert is_whitespace(arithmetic_expression[i]) | |
i += 1 | |
def _parse_integer(i): | |
j = i + 1 | |
while j < len(arithmetic_expression) and is_digit(arithmetic_expression[j]): | |
j += 1 | |
return j, int(arithmetic_expression[i:j]) | |
return tuple(_tokens()) | |
### Parse | |
is_expression_opening_token = lambda token: token == '(' | |
is_expression_closing_token = lambda token: token == ')' | |
is_term_closing_token = lambda token: token in tuple('+-)') | |
def parse(tokens): | |
return parse_arithmetic_expression(tokens, 0)[1] | |
def parse_arithmetic_expression(tokens, i, left_tree=None): | |
if left_tree is None: # very beginning of expression | |
i, term = parse_term(tokens, i) | |
return parse_arithmetic_expression(tokens, i, term) | |
elif i == len(tokens) or is_expression_closing_token(tokens[i]): # very end of expression | |
return i, left_tree | |
else: # in the middle of expression | |
assert tokens[i] in '+-' | |
operator = tokens[i] | |
i, term = parse_term(tokens, i+1) | |
return parse_arithmetic_expression(tokens, i, (left_tree, operator, term)) | |
def parse_term(tokens, i, left_tree=None): | |
if left_tree is None: # very beginning of term | |
i, factor = parse_factor(tokens, i) | |
return parse_term(tokens, i, factor) | |
elif i == len(tokens) or is_term_closing_token(tokens[i]): # very end of term | |
return i, left_tree | |
else: # in the middle of term | |
assert tokens[i] in '*/' | |
operator = tokens[i] | |
i, factor = parse_factor(tokens, i+1) | |
return parse_term(tokens, i, (left_tree, operator, factor)) | |
def parse_factor(tokens, i, positive=True): | |
if isinstance(tokens[i], int): | |
return i+1, tokens[i] if positive else -tokens[i] | |
elif is_expression_opening_token(tokens[i]): | |
i, expression = parse_arithmetic_expression(tokens, i+1) | |
assert is_expression_closing_token(tokens[i]) | |
return i+1, expression if positive else (-1, '*', expression) | |
else: | |
assert tokens[i] in '+-' # Unary operators | |
return parse_factor(tokens, i+1, positive if tokens[i] == '+' else not positive) | |
### Evaluate | |
import operator as op | |
OPERATIONS = { | |
'+': op.add, | |
'-': op.sub, | |
'*': op.mul, | |
'/': lambda a,b: a / b # `op.div` is not influenced by `from __future__ import division`. | |
} | |
def evaluate(binary_expression_tree): | |
if isinstance(binary_expression_tree, int): # Leaf node | |
return binary_expression_tree | |
elif len(binary_expression_tree) == 1: | |
return evaluate(binary_expression_tree[0]) # E.g., '(2)' | |
else: | |
assert len(binary_expression_tree) == 3 | |
left_value = evaluate(binary_expression_tree[0]) | |
operation = OPERATIONS[binary_expression_tree[1]] | |
right_value = evaluate(binary_expression_tree[2]) | |
return operation(left_value, right_value) | |
### Test | |
import random | |
COIN = HEADS, TAILS = True, False | |
coin_flip = lambda: random.choice(COIN) | |
FACTOR_OPTIONS = INT, PARENTHETIC_EXPRESSION, UNARY_OPERATOR = 'int paren unary'.split() | |
random_factor_option = lambda: random.choice(FACTOR_OPTIONS) | |
RECURSION_DEPTH = 4 | |
max_recursion_depth_reached = lambda n: n >= RECURSION_DEPTH | |
random_leading_digit = lambda: str(random.randint(1, 9)) | |
random_trailing_digit = lambda: str(random.randint(0, 9)) | |
def generate_arithmetic_expression(depth=0): | |
if max_recursion_depth_reached(depth) or coin_flip() is HEADS: | |
return generate_term(depth) | |
else: | |
operator = random.choice('+-') | |
return ' '.join([generate_term(depth), operator, generate_arithmetic_expression(depth + 1)]) | |
def generate_term(depth): | |
if max_recursion_depth_reached(depth) or coin_flip() is HEADS: | |
return generate_factor(depth) | |
else: | |
operator = random.choice('*/') | |
return ' '.join([generate_factor(depth), operator, generate_term(depth + 1)]) | |
def generate_factor(depth): | |
factor_option = random_factor_option() | |
if factor_option is INT: | |
leading_digit = random_leading_digit() | |
trailing_digits = ''.join(random_trailing_digit() for _ in xrange(random.randint(0, 5))) | |
return leading_digit + trailing_digits | |
elif factor_option is PARENTHETIC_EXPRESSION: | |
return '(' + generate_arithmetic_expression(depth + 1) + ')' | |
else: | |
unary_operator = random.choice('+-') | |
return unary_operator + generate_factor(depth + 1) | |
def tests(): | |
divide_by_zero_count = 0 | |
test_case_count = 1000 | |
random_test_cases = [generate_arithmetic_expression() for _ in xrange(test_case_count)] | |
hard_coded_test_cases = [ | |
'4 / 2 * 3 + 4 - 6', | |
'2 * -2', | |
'2 - 2', | |
'4 / 2 * (3 + 4) - 6', | |
'-23 / ((4 - 6) * (20 + 202 - 8) / 4) / 3 * 3405 * 3 / 5 / 6 / -7 - 908', | |
'-23 / ((4 - 6) * (20 + 202 - 8) / 4) / 3 * 3405 * 3 / 5 / -(-(3 + 3)) / -(3 + 4 + -10/-10) - 908', | |
'(2)', | |
'1/2/2', | |
'-2 + -2', | |
'(-2 + -2)', | |
'-(-2 + -2)', | |
'-(1 + 1) + -(1 + 1)', | |
'1 -1', | |
'1-1', | |
'4*3-1*5*5/5', | |
'4032 / 7040083', | |
'1 + 2 * (3 + 4) - 5', | |
] | |
for arithmetic_expression in random_test_cases+hard_coded_test_cases: | |
try: | |
actual = eval_arithmetic(arithmetic_expression) | |
expected = eval(arithmetic_expression) # Comparing against Python's built-in `eval`. | |
print '{} = {} (actual) = {} (expected)\n'.format(arithmetic_expression, actual, expected) | |
assert actual == expected | |
except ZeroDivisionError: | |
print 'Division by zero: {}\n'.format(arithmetic_expression) | |
divide_by_zero_count += 1 | |
print 'Tests pass! ({} test cases out of {} threw ZeroDivisionError.)'.format(divide_by_zero_count, test_case_count) | |
if __name__ == '__main__': | |
tests() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment