Created
January 28, 2021 18:58
-
-
Save costrouc/4350deef7d0eb686d9b22a11535176a9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import dataclasses | |
@dataclasses.dataclass | |
class Token: | |
type: str | |
value: str | |
def tokenize(tokens, text): | |
regex = '|'.join([f'(?P<{name}>{pattern})' for name, pattern in tokens.items()]) | |
for match in re.finditer(regex, text): | |
yield Token(type=match.lastgroup, value=match.group(0)) | |
yield Token(type=None, value=None) | |
import dataclasses | |
@dataclasses.dataclass | |
class Context: | |
grammar: dict | |
token_iterator: iter | |
right_token: str = None | |
def advance(context, token_type): | |
if context.right_token.type == token_type: | |
left_token = context.right_token | |
context.right_token = next(context.token_iterator) | |
return left_token | |
def parse(context, right_binding_power=0): | |
left_token = context.right_token or next(context.token_iterator) | |
context.right_token = next(context.token_iterator) | |
left_ast = context.grammar[left_token.type]['null_denotation'](context, left_token) | |
while right_binding_power < context.grammar[context.right_token.type].get('left_binding_power', 0): | |
left_token = context.right_token | |
context.right_token = next(context.token_iterator) | |
left_ast = context.grammar[left_token.type]['left_denotation'](left_token, context, left_ast) | |
return left_ast | |
def grammar_update(grammar, token_type, left_binding_power=0, null_denotation=None, left_denotation=None): | |
if token_type in grammar: | |
grammar[token_type]['left_binding_power'] = max( | |
grammar[token_type].get('left_binding_power', 0), | |
left_binding_power) | |
if null_denotation is not None: | |
if 'null_denotation' in grammar: | |
raise RuntimeError(f'null denotation already set for {token_type}') | |
else: | |
grammar[token_type]['null_denotation'] = null_denotation | |
if left_denotation is not None: | |
if 'left_denotation' in grammar: | |
raise RuntimeError(f'left denotation already set for {token_type}') | |
else: | |
grammar[token_type]['left_denotation'] = left_denotation | |
else: | |
grammar[token_type] = { | |
'left_binding_power': left_binding_power, | |
'null_denotation': null_denotation, | |
'left_denotation': left_denotation | |
} | |
def grammar_end(grammar): | |
grammar_update(grammar, token_type=None) | |
def grammar_symbol(grammar, token_type): | |
grammar_update(grammar, token_type) | |
def grammar_literal(grammar, token_type, function): | |
def nud_function(context, token): | |
return function(token) | |
grammar_update(grammar, token_type, | |
left_binding_power=0, | |
null_denotation=nud_function) | |
def grammar_prefix(grammar, token_type, left_binding_power, function): | |
def nud_function(context, token): | |
operand = parse(context, right_binding_power=left_binding_power) | |
return function(token, operand) | |
grammar_update(grammar, token_type, | |
left_binding_power=left_binding_power, | |
null_denotation=nud_function) | |
def grammar_infix(grammar, token_type, left_binding_power, function): | |
def led_function(token, context, left_ast): | |
right_ast = parse(context, right_binding_power=left_binding_power) | |
return function(token, left_ast, right_ast) | |
grammar_update(grammar, token_type, | |
left_binding_power=left_binding_power, | |
left_denotation=led_function) | |
def grammar_infix_r(grammar, token_type, left_binding_power, function): | |
def led_function(token, context, left_ast): | |
right_ast = parse(context, right_binding_power=left_binding_power - 1) | |
return function(token, left_ast, right_ast) | |
grammar_update(grammar, token_type, | |
left_binding_power=left_binding_power, | |
left_denotation=led_function) | |
tokens = { | |
'float': '\d+\.\d+', | |
'integer': '\d+', | |
'subtraction': '\-', | |
'addition': '\+', | |
'multiplication': '\*', | |
'power': '\^', | |
} | |
grammar = { } | |
def integer_literal(token): | |
return int(token.value) | |
def float_literal(token): | |
return float(token.value) | |
def subtraction_infix(token, left_ast, right_ast): | |
return left_ast - right_ast | |
def subtraction_prefix(token, right_ast): | |
return -right_ast | |
def addition_infix(token, left_ast, right_ast): | |
return left_ast + right_ast | |
def multiplication_infix(token, left_ast, right_ast): | |
return left_ast * right_ast | |
def power_infix_r(token, left_ast, right_ast): | |
return pow(left_ast, right_ast) | |
grammar_end(grammar) | |
grammar_literal(grammar, 'integer', integer_literal) | |
grammar_literal(grammar, 'float', float_literal) | |
grammar_infix(grammar, 'subtraction', 10, subtraction_infix) | |
grammar_prefix(grammar, 'subtraction', 10, subtraction_prefix) | |
grammar_infix(grammar, 'addition', 10, addition_infix) | |
grammar_infix(grammar, 'multiplication', 20, multiplication_infix) | |
grammar_infix_r(grammar, 'power', 30, power_infix_r) | |
def grammar_enclosed(grammar, begin_token_type, end_token_type, left_binding_power, function): | |
def nud_function(context, left_token): | |
body = parse(context) | |
right_token = advance(context, end_token_type) | |
return function(left_token, right_token, body) | |
grammar_update(grammar, begin_token_type, | |
left_binding_power=left_binding_power, | |
null_denotation=nud_function) | |
grammar_update(grammar, end_token_type) | |
tokens.update({ | |
'left_paren': '\(', | |
'right_paren': '\)', | |
'bar': '\|' | |
}) | |
def parenthesis_enclosed(left_token, right_token, body): | |
return body | |
def absolute_enclosed(left_token, right_token, body): | |
return abs(body) | |
grammar_enclosed(grammar, 'left_paren', 'right_paren', 0, parenthesis_enclosed) | |
grammar_enclosed(grammar, 'bar', 'bar', 0, absolute_enclosed) | |
text = '1 + 1 * (2 - -----4) + |-7| + (2 ^ 3) ^ 2' | |
context = Context(grammar, tokenize(tokens, text)) | |
print(parse(context)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment