jg-rp · January 4, 2022 17:32
diff --git a/eval_expression.py b/eval_expression.py
 """Evaluate arithmetic expressions using a stack by fist converting
 an expression from infix notation to postfix notation.

 Expressions are tokenized using regular expressions, as described at
 https://docs.python.org/3/library/re.html#writing-a-tokenizer
 """
 import operator
 import re

 from collections import deque

 from typing import Deque
 from typing import Iterable
 from typing import Iterator
 from typing import List
 from typing import NamedTuple
 from typing import Union


 TOKEN_MUL = "MUL"
 TOKEN_ADD = "ADD"
 TOKEN_SUB = "SUB"
 TOKEN_DIV = "DIV"
 TOKEN_POW = "POW"
 TOKEN_LP = "LP"
 TOKEN_RP = "RP"
 TOKEN_NEG = "NEG"
 TOKEN_POS = "POS"
 TOKEN_INT = "INT"
 TOKEN_FLOAT = "FLOAT"
 TOKEN_SKIP = "SKIP"
 TOKEN_ILLEGAL = "ILLEGAL"
 TOKEN_EMPTY = "EMPTY"


 class Token(NamedTuple):
    type: str
    value: str


 Operand = Union[int, float]

 OPERANDS = {
    TOKEN_INT: int,
    TOKEN_FLOAT: float,
 }

 BINARY_OPERATORS = {
    TOKEN_ADD: operator.add,
    TOKEN_SUB: operator.sub,
    TOKEN_MUL: operator.mul,
    TOKEN_DIV: operator.truediv,
    TOKEN_POW: operator.pow,
 }

 UNARY_OPERATORS = {
    TOKEN_NEG: operator.neg,
    TOKEN_POS: operator.pos,
 }

 # A map of binary operators to unary operators that use the same
 # symbol. If a token with an overloaded operator appears after a
 # token with a type in UNARY_LOOK_BEHIND, it will be considered a
 # unary operator.
 OVERLOADED_OPERATORS = {
    TOKEN_SUB: TOKEN_NEG,
    TOKEN_ADD: TOKEN_POS,
 }

 EMPTY = Token(TOKEN_EMPTY, "")

 UNARY_LOOK_BEHIND = frozenset(
    [
        *BINARY_OPERATORS,
        TOKEN_EMPTY,
        TOKEN_LP,
    ]
 )

 PRECEDENCE = {
    **{op: 1 for op in BINARY_OPERATORS},
    TOKEN_MUL: 2,
    TOKEN_DIV: 2,
    TOKEN_POW: 5,
    **{op: 3 for op in UNARY_OPERATORS},
 }

 RIGHT_ASSOCIATIVE = frozenset(
    [
        TOKEN_POW,
    ]
 )

 TOKEN_RULES = (
    (TOKEN_LP, r"\("),
    (TOKEN_RP, r"\)"),
    (TOKEN_ADD, r"\+"),
    (TOKEN_SUB, r"\-"),
    (TOKEN_POW, r"\*\*"),
    (TOKEN_MUL, r"\*"),
    (TOKEN_DIV, r"\/"),
    (TOKEN_FLOAT, r"\d+\.\d*"),
    (TOKEN_INT, r"\d+"),
    (TOKEN_SKIP, r"\s+"),
    (TOKEN_ILLEGAL, r"."),
 )

 RE_TOKENS = re.compile(
    "|".join(
        f"(?P<{name}>{pattern})" for name, pattern in TOKEN_RULES
    ),
    re.DOTALL,
 )


 def tokenize(expression: str) -> Iterator[Token]:
    """Yield tokens from the given arithmetic expression."""
    for match in RE_TOKENS.finditer(expression):
        kind = match.lastgroup
        value = match.group()

        if kind == TOKEN_SKIP:
            continue

        if kind == TOKEN_ILLEGAL:
            raise Exception(f"unexpected {value!r}")

        assert kind is not None
        yield Token(type=kind, value=value)


 def postfix(tokens: Iterable[Token]) -> List[Token]:
    """Convert a stream of tokens in infix notation into a list of
    tokens in postfix notaion."""
    post: List[Token] = []
    stack: Deque[Token] = deque()

    # Previous token. Used for determining if an overloaded operator
    # is binary or unary. A hyphen, for example, could be infix
    # subtraction or prefix negation.
    prev = EMPTY

    for token in tokens:
        if token.type in OPERANDS:
            post.append(token)
        elif token.type == TOKEN_LP:
            stack.append(token)
        elif token.type == TOKEN_RP:
            while stack and stack[-1].type != TOKEN_LP:
                post.append(stack.pop())
            assert (
                stack and stack.pop().type == TOKEN_LP
            ), "unbalanced parentheses"
        elif token.type in BINARY_OPERATORS:
            if (
                token.type in OVERLOADED_OPERATORS
                and prev.type in UNARY_LOOK_BEHIND
            ):
                # Unary operator
                token = token._replace(
                    type=OVERLOADED_OPERATORS[token.type]
                )
            if not stack or stack[-1].type == TOKEN_LP:
                stack.append(token)
            else:
                while (
                    stack
                    and stack[-1].type != TOKEN_LP
                    and token.type not in RIGHT_ASSOCIATIVE
                    and PRECEDENCE[token.type]
                    <= PRECEDENCE[stack[-1].type]
                ):
                    post.append(stack.pop())
                stack.append(token)

        prev = token

    while stack:
        token = stack.pop()
        assert token.type != TOKEN_LP, "unbalanced parentheses"
        post.append(token)

    return post


 def eval_postfix(tokens: Iterable[Token]) -> Operand:
    """Evaluate an expression in postfix notation given as a
    sequence of tokens."""
    stack: List[Operand] = []

    for token in tokens:
        if token.type in OPERANDS:
            stack.append(OPERANDS[token.type](token.value))
        elif token.type in BINARY_OPERATORS:
            right = stack.pop()
            left = stack.pop()
            res = BINARY_OPERATORS[token.type](left, right)
            stack.append(res)
        elif token.type in UNARY_OPERATORS:
            val = stack.pop()
            res = UNARY_OPERATORS[token.type](val)
            stack.append(res)

    assert len(stack) == 1
    return stack.pop()


 def eval_infix(expression: str) -> Operand:
    """Evaluate an expression string given in infix notation."""
    return eval_postfix(postfix(tokenize(expression)))


 if __name__ == "__main__":
    # Example usage
    result = eval_infix("6 * (2.5 + 42) / (7 - 5.0)")
    print(result)  # 133.5
	"""Evaluate arithmetic expressions using a stack by fist converting
	an expression from infix notation to postfix notation.

	Expressions are tokenized using regular expressions, as described at
	https://docs.python.org/3/library/re.html#writing-a-tokenizer
	"""
	import operator
	import re

	from collections import deque

	from typing import Deque
	from typing import Iterable
	from typing import Iterator
	from typing import List
	from typing import NamedTuple
	from typing import Union


	TOKEN_MUL = "MUL"
	TOKEN_ADD = "ADD"
	TOKEN_SUB = "SUB"
	TOKEN_DIV = "DIV"
	TOKEN_POW = "POW"
	TOKEN_LP = "LP"
	TOKEN_RP = "RP"
	TOKEN_NEG = "NEG"
	TOKEN_POS = "POS"
	TOKEN_INT = "INT"
	TOKEN_FLOAT = "FLOAT"
	TOKEN_SKIP = "SKIP"
	TOKEN_ILLEGAL = "ILLEGAL"
	TOKEN_EMPTY = "EMPTY"


	class Token(NamedTuple):
	type: str
	value: str


	Operand = Union[int, float]

	OPERANDS = {
	TOKEN_INT: int,
	TOKEN_FLOAT: float,
	}

	BINARY_OPERATORS = {
	TOKEN_ADD: operator.add,
	TOKEN_SUB: operator.sub,
	TOKEN_MUL: operator.mul,
	TOKEN_DIV: operator.truediv,
	TOKEN_POW: operator.pow,
	}

	UNARY_OPERATORS = {
	TOKEN_NEG: operator.neg,
	TOKEN_POS: operator.pos,
	}

	# A map of binary operators to unary operators that use the same
	# symbol. If a token with an overloaded operator appears after a
	# token with a type in UNARY_LOOK_BEHIND, it will be considered a
	# unary operator.
	OVERLOADED_OPERATORS = {
	TOKEN_SUB: TOKEN_NEG,
	TOKEN_ADD: TOKEN_POS,
	}

	EMPTY = Token(TOKEN_EMPTY, "")

	UNARY_LOOK_BEHIND = frozenset(
	[
	*BINARY_OPERATORS,
	TOKEN_EMPTY,
	TOKEN_LP,
	]
	)

	PRECEDENCE = {
	**{op: 1 for op in BINARY_OPERATORS},
	TOKEN_MUL: 2,
	TOKEN_DIV: 2,
	TOKEN_POW: 5,
	**{op: 3 for op in UNARY_OPERATORS},
	}

	RIGHT_ASSOCIATIVE = frozenset(
	[
	TOKEN_POW,
	]
	)

	TOKEN_RULES = (
	(TOKEN_LP, r"\("),
	(TOKEN_RP, r"\)"),
	(TOKEN_ADD, r"\+"),
	(TOKEN_SUB, r"\-"),
	(TOKEN_POW, r"\\"),
	(TOKEN_MUL, r"\*"),
	(TOKEN_DIV, r"\/"),
	(TOKEN_FLOAT, r"\d+\.\d*"),
	(TOKEN_INT, r"\d+"),
	(TOKEN_SKIP, r"\s+"),
	(TOKEN_ILLEGAL, r"."),
	)

	RE_TOKENS = re.compile(
	"\|".join(
	f"(?P<{name}>{pattern})" for name, pattern in TOKEN_RULES
	),
	re.DOTALL,
	)


	def tokenize(expression: str) -> Iterator[Token]:
	"""Yield tokens from the given arithmetic expression."""
	for match in RE_TOKENS.finditer(expression):
	kind = match.lastgroup
	value = match.group()

	if kind == TOKEN_SKIP:
	continue

	if kind == TOKEN_ILLEGAL:
	raise Exception(f"unexpected {value!r}")

	assert kind is not None
	yield Token(type=kind, value=value)


	def postfix(tokens: Iterable[Token]) -> List[Token]:
	"""Convert a stream of tokens in infix notation into a list of
	tokens in postfix notaion."""
	post: List[Token] = []
	stack: Deque[Token] = deque()

	# Previous token. Used for determining if an overloaded operator
	# is binary or unary. A hyphen, for example, could be infix
	# subtraction or prefix negation.
	prev = EMPTY

	for token in tokens:
	if token.type in OPERANDS:
	post.append(token)
	elif token.type == TOKEN_LP:
	stack.append(token)
	elif token.type == TOKEN_RP:
	while stack and stack[-1].type != TOKEN_LP:
	post.append(stack.pop())
	assert (
	stack and stack.pop().type == TOKEN_LP
	), "unbalanced parentheses"
	elif token.type in BINARY_OPERATORS:
	if (
	token.type in OVERLOADED_OPERATORS
	and prev.type in UNARY_LOOK_BEHIND
	):
	# Unary operator
	token = token._replace(
	type=OVERLOADED_OPERATORS[token.type]
	)
	if not stack or stack[-1].type == TOKEN_LP:
	stack.append(token)
	else:
	while (
	stack
	and stack[-1].type != TOKEN_LP
	and token.type not in RIGHT_ASSOCIATIVE
	and PRECEDENCE[token.type]
	<= PRECEDENCE[stack[-1].type]
	):
	post.append(stack.pop())
	stack.append(token)

	prev = token

	while stack:
	token = stack.pop()
	assert token.type != TOKEN_LP, "unbalanced parentheses"
	post.append(token)

	return post


	def eval_postfix(tokens: Iterable[Token]) -> Operand:
	"""Evaluate an expression in postfix notation given as a
	sequence of tokens."""
	stack: List[Operand] = []

	for token in tokens:
	if token.type in OPERANDS:
	stack.append(OPERANDS[token.type](token.value))
	elif token.type in BINARY_OPERATORS:
	right = stack.pop()
	left = stack.pop()
	res = BINARY_OPERATORS[token.type](left, right)
	stack.append(res)
	elif token.type in UNARY_OPERATORS:
	val = stack.pop()
	res = UNARY_OPERATORS[token.type](val)
	stack.append(res)

	assert len(stack) == 1
	return stack.pop()


	def eval_infix(expression: str) -> Operand:
	"""Evaluate an expression string given in infix notation."""
	return eval_postfix(postfix(tokenize(expression)))


	if __name__ == "__main__":
	# Example usage
	result = eval_infix("6 * (2.5 + 42) / (7 - 5.0)")
	print(result) # 133.5