Last active
July 24, 2023 09:31
-
-
Save yamnikov-oleg/fcf39fa8b22bf84b5f2931dac713d607 to your computer and use it in GitHub Desktop.
An example implementation of a formula evaluator written on top of Python's AST package. See https://blog.oyam.dev/python-formulas/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
An example implementation of a formula evaluator written on top of Python's AST package. | |
See https://blog.oyam.dev/python-formulas/ | |
Usage: | |
>>> evaluate_formula("a + b", {"a": 1, "b": 2}) | |
3 | |
""" | |
import ast | |
import operator | |
from typing import Any, Dict | |
def byte_offset_to_char_offset(source: str, byte_offset: int) -> int: | |
while True: | |
try: | |
pre_source = source.encode()[:byte_offset].decode() | |
break | |
except UnicodeDecodeError: | |
byte_offset -= 1 | |
continue | |
return len(pre_source) | |
class FormulaError(Exception): | |
pass | |
class FormulaSyntaxError(FormulaError): | |
def __init__(self, msg: str, lineno: int, offset: int): | |
self.msg = msg | |
self.lineno = lineno | |
self.offset = offset | |
@classmethod | |
def from_ast_node(cls, source: str, node: ast.AST, msg: str) -> "FormulaSyntaxError": | |
lineno = node.lineno | |
col_offset = node.col_offset | |
offset = byte_offset_to_char_offset(source, col_offset) | |
return cls(msg=msg, lineno=lineno, offset=offset + 1) | |
@classmethod | |
def from_syntax_error(cls, error: SyntaxError, msg: str) -> "FormulaSyntaxError": | |
return cls(msg=f"{msg}: {error.msg}", lineno=error.lineno, offset=error.offset) | |
def __str__(self): | |
return f"{self.lineno}:{self.offset}: {self.msg}" | |
class FormulaRuntimeError(FormulaError): | |
pass | |
MAX_FORMULA_LENGTH = 255 | |
def evaluate_formula(formula: str, vars: Dict[str, Any]) -> float: | |
if len(formula) > MAX_FORMULA_LENGTH: | |
raise FormulaSyntaxError("The formula is too long", 1, 1) | |
try: | |
node = ast.parse(formula, "<string>", mode="eval") | |
except SyntaxError as e: | |
raise FormulaSyntaxError.from_syntax_error(e, "Could not parse") | |
try: | |
return eval_node(formula, node, vars) | |
except FormulaSyntaxError: | |
raise | |
except Exception as e: | |
raise FormulaRuntimeError(f"Evaluation failed: {e}") | |
def eval_node(source: str, node: ast.AST, vars: Dict[str, Any]) -> float: | |
EVALUATORS = { | |
ast.Expression: eval_expression, | |
ast.Constant: eval_constant, | |
ast.Name: eval_name, | |
ast.BinOp: eval_binop, | |
ast.UnaryOp: eval_unaryop, | |
} | |
for ast_type, evaluator in EVALUATORS.items(): | |
if isinstance(node, ast_type): | |
return evaluator(source, node, vars) | |
raise FormulaSyntaxError.from_ast_node(source, node, "This syntax is not supported") | |
def eval_expression(source: str, node: ast.Expression, vars: Dict[str, Any]) -> float: | |
return eval_node(source, node.body, vars) | |
def eval_constant(source: str, node: ast.Constant, vars: Dict[str, Any]) -> float: | |
if isinstance(node.value, int) or isinstance(node.value, float): | |
return float(node.value) | |
else: | |
raise FormulaSyntaxError.from_ast_node(source, node, "Literals of this type are not supported") | |
def eval_name(source: str, node: ast.Name, vars: Dict[str, Any]) -> float: | |
try: | |
return float(vars[node.id]) | |
except KeyError: | |
raise FormulaSyntaxError.from_ast_node(source, node, f"Undefined variable: {node.id}") | |
def eval_binop(source: str, node: ast.BinOp, vars: Dict[str, Any]) -> float: | |
OPERATIONS = { | |
ast.Add: operator.add, | |
ast.Sub: operator.sub, | |
ast.Mult: operator.mul, | |
ast.Div: operator.truediv, | |
} | |
left_value = eval_node(source, node.left, vars) | |
right_value = eval_node(source, node.right, vars) | |
try: | |
apply = OPERATIONS[type(node.op)] | |
except KeyError: | |
raise FormulaSyntaxError.from_ast_node(source, node, "Operations of this type are not supported") | |
return apply(left_value, right_value) | |
def eval_unaryop(source: str, node: ast.UnaryOp, vars: Dict[str, Any]) -> float: | |
OPERATIONS = { | |
ast.USub: operator.neg, | |
} | |
operand_value = eval_node(source, node.operand, vars) | |
try: | |
apply = OPERATIONS[type(node.op)] | |
except KeyError: | |
raise FormulaSyntaxError.from_ast_node(source, node, "Operations of this type are not supported") | |
return apply(operand_value) | |
TEST_CASES = [ | |
("2", {}), | |
("2.0", {}), | |
("2e-1", {}), | |
("1 + 2 * (3.0 / 4.0)", {}), | |
("a * b / c", {"a": 1.0, "b": 3, "c": 91}), | |
("''", {}), | |
("1 ** 2", {}), | |
("1 // 2", {}), | |
("not 2", {}), | |
("und", {}), | |
("and and", {}), | |
("c(1)", {}), | |
("1/0", {}), | |
("0" * 256, {}), | |
("lambda a:" * 28, {}), | |
("(points - 100 * bans) / gamesPlayed", {"points": 1200, "bans": 3, "gamesPlayed": 23}), | |
] | |
for formula, vars in TEST_CASES: | |
try: | |
result = evaluate_formula(formula, vars) | |
print(formula, "=", result) | |
except FormulaError as e: | |
print(formula, "raises", type(e), str(e)) |
Nice work! How would you implement multiple formulas which are dependent from each other (but no cycles)? You mentioned it already in the Ideas section.
Sorry for the delayed reply.
You could give each formula a defined name (as we do it in Pythondef foo
), and then extend your parser to make it parse function calling node. The node is called Call
.
To prevent cycles you could either build a dependency tree during parsing and fail if one of the functions depends on a node that is already in the tree.
Or you could collect the stack of called functions during runtime and fail if one the functions appears in the stack twice.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice work!
How would you implement multiple formulas which are dependent from each other (but no cycles)? You mentioned it already in the Ideas section.