Last active
August 10, 2021 02:07
-
-
Save wataash/dc9f8352dc147b223382def86c3a592b to your computer and use it in GitHub Desktop.
A Python implementation of "Packrat Parsers Can Support Left Recursion" by Alessandro Warth, James R. Douglass, and Todd Millstein. http://www.vpri.org/pdf/tr2007002_packrat.pdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
""" | |
https://gist.github.com/wataash/dc9f8352dc147b223382def86c3a592b | |
A Python implementation of "Packrat Parsers Can Support Left Recursion" by | |
Alessandro Warth, James R. Douglass, and Todd Millstein. | |
http://www.vpri.org/pdf/tr2007002_packrat.pdf | |
2. An Overview of Packrat Parsing | |
""" | |
import collections | |
import dataclasses | |
import inspect | |
import re | |
import typing as t | |
def n_stack() -> int: | |
return len([None for x in inspect.stack() if x.function == 'EVAL']) - 1 | |
""" | |
[R.body]: | |
In the APPLY-RULE() function in the paper, the EVAL() function is invoked as: | |
EVAL(R.body) | |
where R is RULE. But I have no idea what the R.body (RULE's body) is -- | |
instead, we define the global variable "BODY", and invoke EVAL() function as: | |
EVAL(R) | |
""" | |
Body = t.NewType('Body', str) # not defined in the paper | |
# BODY = Body('1234-5') | |
POS = t.NewType('POS', int) # POS | |
# Pos = POS(0) # Pos | |
@dataclasses.dataclass(frozen=True) | |
class AST: | |
""" | |
AST | |
The data structure is not defined in the paper. | |
""" | |
type: str | |
string: str | |
leaf_len: int | |
children: list['AST'] | |
n_stack: int | |
def __repr__(self): | |
color = ['\x1b[31m', '\x1b[32m', '\x1b[33m'][self.n_stack % 3] | |
if self.children: | |
children = " ".join(str(x) for x in self.children) | |
return f'{color}<\x1b[0m{self.type}:{self.string} {children}{color}>\x1b[0m' | |
return f'{color}<\x1b[0m{self.type}:{self.string}{color}>\x1b[0m' | |
FAIL = AST('FAIL', '', 0, [], 0) # FAIL | |
@dataclasses.dataclass(frozen=True) | |
class MemoEntry: | |
"""MEMOENTRY""" | |
ans: AST | |
pos: POS | |
def __repr__(self): | |
return f'M[{self.pos} {self.ans}]' | |
# keep order for better debug-print() | |
memo_: collections.OrderedDict[tuple['RULE', POS], MemoEntry] = collections.OrderedDict() | |
# noinspection PyPep8Naming | |
def MEMO(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
global memo_ | |
"""MEMO :(RULE, POS) -> MEMOENTRY""" | |
return memo_.get((R, P)) | |
# noinspection PyPep8Naming | |
def MEMO_set(R: 'RULE', P: POS, m: MemoEntry): | |
global memo_ | |
"""MEMO(R, P) <- m""" | |
assert (R, P) not in memo_ | |
memo_[R, P] = m | |
memo_str_last: dict[tuple['RULE', POS], str] = {} | |
def print_memo_if_changed(R: 'RULE', P: POS, indent: str): | |
global memo_str_last | |
s = f'{indent} MEMO({R.__name__}, {P}) <- {MEMO(R, P)}' | |
if s == memo_str_last.get((R, P)): | |
return | |
print(s) | |
memo_str_last[R, P] = s | |
# def EVAL(body: Body) -> AST: # [R.body] | |
# noinspection PyPep8Naming | |
def EVAL(R: 'RULE') -> AST: | |
""" | |
EVAL | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: evaluate\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
ast = R() | |
if ast is FAIL: | |
assert Pos == pos_orig | |
print(f'\x1b[34m{indent}{R.__name__}: failed\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
return FAIL | |
pos2 = Pos + ast.leaf_len | |
print(f'\x1b[34m{indent}{R.__name__}: matched: {BODY[pos_orig:pos2]}\x1b[0m') | |
if Pos != pos2: | |
print(f'{indent} Pos:{Pos}->{pos2} BODY:{BODY[Pos:pos2]}>{BODY[pos2:]}') | |
print(f'{indent} ast:{ast}') | |
Pos = pos2 | |
return ast | |
# noinspection PyPep8Naming | |
def APPLY_RULE(R: 'RULE', P: POS) -> AST: | |
""" | |
Figure 1. | |
APPLY-RULE(R, P) | |
""" | |
global Pos | |
indent = ' ' * n_stack() | |
m = MEMO(R, P) | |
if m is None: | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
m = MemoEntry(ans, Pos) | |
MEMO_set(R, P, m) # MEMO(R, P) <- m | |
print_memo_if_changed(R, P, indent) | |
return ans | |
else: | |
print(f'\x1b[34m{indent}{R.__name__}: using memo\x1b[0m') | |
print(f'{indent} Pos:{Pos}->{m.pos} BODY:{BODY[Pos:m.pos]}>{BODY[m.pos:]}') | |
print(f'{indent} m:{m}') | |
Pos = m.pos | |
return m.ans | |
# noinspection PyPep8Naming | |
def APPLY_RULE_or_rollback_Pos(R: 'RULE', P: POS, pos_rollback_to: POS) -> AST: | |
global Pos | |
ast = APPLY_RULE(R, P) | |
if ast is FAIL: | |
Pos = pos_rollback_to | |
return ast | |
# ----------------------------------------------------------------------------- | |
# RULE | |
# The type is defined in the paper | |
RULE = t.Callable[[], AST] | |
rule_expr: RULE | |
rule_num: RULE | |
rule_plus: RULE | |
rule_minus: RULE | |
def rule_expr() -> AST: | |
""" | |
expr ::= <num> "+" <num> | |
/ <num> "-" <num> | |
""" | |
def rule1() -> AST: | |
"""<num> "+" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_plus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<num> "-" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_minus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_num() -> AST: | |
""" | |
<num> | |
""" | |
global BODY, Pos | |
match = re.search(r'^(\d+)', BODY[Pos:]) | |
if match is None: | |
return FAIL | |
return AST('num', match[1], len(match[1]), [], n_stack()) | |
def rule_plus() -> AST: | |
""" | |
"+" | |
""" | |
global BODY, Pos | |
if BODY[Pos] != '+': | |
return FAIL | |
return AST('"+"', '+', len('+'), [], n_stack()) | |
def rule_minus() -> AST: | |
""" | |
"-" | |
""" | |
global BODY, Pos | |
if BODY[Pos] != '-': | |
return FAIL | |
return AST('"-"', '-', len('-'), [], n_stack()) | |
# ----------------------------------------------------------------------------- | |
# main | |
if __name__ == '__main__': | |
BODY = Body('1234-5') | |
Pos = POS(0) # Pos | |
pos_orig = Pos | |
ast = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
# AST(string='expr', children=[AST(string='1234', children=[]), AST(string='-', children=[]), AST(string='5', children=[])]) | |
print() | |
print(ast) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
""" | |
https://gist.github.com/wataash/dc9f8352dc147b223382def86c3a592b | |
A Python implementation of "Packrat Parsers Can Support Left Recursion" by | |
Alessandro Warth, James R. Douglass, and Todd Millstein. | |
http://www.vpri.org/pdf/tr2007002_packrat.pdf | |
3. Adding Support for Left Recursion | |
""" | |
import collections | |
import dataclasses | |
import inspect | |
import re | |
import typing as t | |
def n_stack() -> int: | |
return len([None for x in inspect.stack() if x.function == 'EVAL']) - 1 | |
""" | |
[R.body]: | |
In the APPLY-RULE() function in the paper, the EVAL() function is invoked as: | |
EVAL(R.body) | |
where R is RULE. But I have no idea what the R.body (RULE's body) is -- | |
instead, we define the global variable "BODY", and invoke EVAL() function as: | |
EVAL(R) | |
""" | |
Body = t.NewType('Body', str) # not defined in the paper | |
# BODY = Body('1234-5') | |
POS = t.NewType('POS', int) # POS | |
# Pos = POS(0) # Pos | |
@dataclasses.dataclass(frozen=True) | |
class AST: | |
""" | |
AST | |
The data structure is not defined in the paper. | |
""" | |
type: str | |
string: str | |
leaf_len: int | |
children: list['AST'] | |
n_stack: int | |
def __repr__(self): | |
color = ['\x1b[31m', '\x1b[32m', '\x1b[33m'][self.n_stack % 3] | |
if self.children: | |
children = " ".join(str(x) for x in self.children) | |
return f'{color}<\x1b[0m{self.type}:{self.string} {children}{color}>\x1b[0m' | |
return f'{color}<\x1b[0m{self.type}:{self.string}{color}>\x1b[0m' | |
FAIL = AST('FAIL', '', 0, [], 0) # FAIL | |
@dataclasses.dataclass(frozen=True) | |
class MemoEntry: | |
"""MEMOENTRY""" | |
ans: AST | |
pos: POS | |
def __repr__(self): | |
return f'M[{self.pos} {self.ans}]' | |
# keep order for better debug-print() | |
memo_: collections.OrderedDict[tuple['RULE', POS], MemoEntry] = collections.OrderedDict() | |
# noinspection PyPep8Naming | |
def MEMO(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
global memo_ | |
"""MEMO :(RULE, POS) -> MEMOENTRY""" | |
return memo_.get((R, P)) | |
# noinspection PyPep8Naming | |
def MEMO_set(R: 'RULE', P: POS, m: MemoEntry): | |
global memo_ | |
"""MEMO(R, P) <- m""" | |
assert (R, P) not in memo_ | |
memo_[R, P] = m | |
memo_str_last: dict[tuple['RULE', POS], str] = {} | |
def print_memo_if_changed(R: 'RULE', P: POS, indent: str): | |
global memo_str_last | |
s = f'{indent} MEMO({R.__name__}, {P}) <- {MEMO(R, P)}' | |
if s == memo_str_last.get((R, P)): | |
return | |
print(s) | |
memo_str_last[R, P] = s | |
# def EVAL(body: Body) -> AST: # [R.body] | |
# noinspection PyPep8Naming | |
def EVAL(R: 'RULE') -> AST: | |
""" | |
EVAL | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: evaluate\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
ast = R() | |
if ast is FAIL: | |
assert Pos == pos_orig | |
print(f'\x1b[34m{indent}{R.__name__}: failed\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
return FAIL | |
pos2 = Pos + ast.leaf_len | |
print(f'\x1b[34m{indent}{R.__name__}: matched: {BODY[pos_orig:pos2]}\x1b[0m') | |
if Pos != pos2: | |
print(f'{indent} Pos:{Pos}->{pos2} BODY:{BODY[Pos:pos2]}>{BODY[pos2:]}') | |
print(f'{indent} ast:{ast}') | |
Pos = pos2 | |
return ast | |
# noinspection PyPep8Naming | |
def APPLY_RULE(R: 'RULE', P: POS) -> AST: | |
""" | |
Figure 1. | |
APPLY-RULE(R, P) | |
""" | |
global Pos | |
indent = ' ' * n_stack() | |
m = MEMO(R, P) | |
if m is None: | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
m = MemoEntry(ans, Pos) | |
MEMO_set(R, P, m) # MEMO(R, P) <- m | |
print_memo_if_changed(R, P, indent) | |
return ans | |
else: | |
print(f'\x1b[34m{indent}{R.__name__}: using memo\x1b[0m') | |
print(f'{indent} Pos:{Pos}->{m.pos} BODY:{BODY[Pos:m.pos]}>{BODY[m.pos:]}') | |
print(f'{indent} m:{m}') | |
Pos = m.pos | |
return m.ans | |
# noinspection PyPep8Naming | |
def APPLY_RULE_or_rollback_Pos(R: 'RULE', P: POS, pos_rollback_to: POS) -> AST: | |
global Pos | |
ast = APPLY_RULE(R, P) | |
if ast is FAIL: | |
Pos = pos_rollback_to | |
return ast | |
# ----------------------------------------------------------------------------- | |
# RULE | |
# The type is defined in the paper | |
RULE = t.Callable[[], AST] | |
rule_expr: RULE | |
rule_num: RULE | |
rule_plus: RULE | |
rule_minus: RULE | |
def rule_expr() -> AST: | |
""" | |
expr ::= <expr> "-" <num> / <num> | |
""" | |
def rule1() -> AST: | |
"""<expr> "-" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_minus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_num() -> AST: | |
""" | |
<num> | |
""" | |
global BODY, Pos | |
match = re.search(r'^(\d+)', BODY[Pos:]) | |
if match is None: | |
return FAIL | |
return AST('num', match[1], len(match[1]), [], n_stack()) | |
def rule_plus() -> AST: | |
""" | |
"+" | |
""" | |
global BODY, Pos | |
if BODY[Pos] != '+': | |
return FAIL | |
return AST('"+"', '+', len('+'), [], n_stack()) | |
def rule_minus() -> AST: | |
""" | |
"-" | |
""" | |
global BODY, Pos | |
if BODY[Pos] != '-': | |
return FAIL | |
return AST('"-"', '-', len('-'), [], n_stack()) | |
# ----------------------------------------------------------------------------- | |
# main | |
if __name__ == '__main__': | |
BODY = Body('1-2-3') | |
Pos = POS(0) # Pos | |
pos_orig = Pos | |
# > The parser is doomed to repeat the same steps forever, or more | |
# > precisely, until the computer eventually runs out of the stack space. | |
# RecursionError | |
ast = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
print() | |
print(ast) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
""" | |
https://gist.github.com/wataash/dc9f8352dc147b223382def86c3a592b | |
A Python implementation of "Packrat Parsers Can Support Left Recursion" by | |
Alessandro Warth, James R. Douglass, and Todd Millstein. | |
http://www.vpri.org/pdf/tr2007002_packrat.pdf | |
3.1 Avoiding Infinite Recursion in Left-Recursive Rules | |
""" | |
import collections | |
import dataclasses | |
import inspect | |
import re | |
import typing as t | |
def n_stack() -> int: | |
return len([None for x in inspect.stack() if x.function == 'EVAL']) - 1 | |
""" | |
[R.body]: | |
In the APPLY-RULE() function in the paper, the EVAL() function is invoked as: | |
EVAL(R.body) | |
where R is RULE. But I have no idea what the R.body (RULE's body) is -- | |
instead, we define the global variable "BODY", and invoke EVAL() function as: | |
EVAL(R) | |
""" | |
Body = t.NewType('Body', str) # not defined in the paper | |
# BODY = Body('1234-5') | |
POS = t.NewType('POS', int) # POS | |
# Pos = POS(0) # Pos | |
@dataclasses.dataclass(frozen=True) | |
class AST: | |
""" | |
AST | |
The data structure is not defined in the paper. | |
""" | |
type: str | |
string: str | |
leaf_len: int | |
children: list['AST'] | |
n_stack: int | |
def __repr__(self): | |
color = ['\x1b[31m', '\x1b[32m', '\x1b[33m'][self.n_stack % 3] | |
if self.children: | |
children = " ".join(str(x) for x in self.children) | |
return f'{color}<\x1b[0m{self.type}:{self.string} {children}{color}>\x1b[0m' | |
return f'{color}<\x1b[0m{self.type}:{self.string}{color}>\x1b[0m' | |
FAIL = AST('FAIL', '', 0, [], 0) # FAIL | |
@dataclasses.dataclass(frozen=False) | |
class MemoEntry: | |
"""MEMOENTRY""" | |
ans: AST | |
pos: POS | |
# keep order for better debug-print() | |
memo_: collections.OrderedDict[tuple['RULE', POS], MemoEntry] = collections.OrderedDict() | |
# noinspection PyPep8Naming | |
def MEMO(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
global memo_ | |
"""MEMO :(RULE, POS) -> MEMOENTRY""" | |
return memo_.get((R, P)) | |
# noinspection PyPep8Naming | |
def MEMO_set(R: 'RULE', P: POS, m: MemoEntry): | |
global memo_ | |
"""MEMO(R, P) <- m""" | |
assert (R, P) not in memo_ | |
memo_[R, P] = m | |
memo_str_last: dict[tuple['RULE', POS], str] = {} | |
def print_memo_if_changed(R: 'RULE', P: POS, indent: str): | |
global memo_str_last | |
s = f'{indent} MEMO({R.__name__}, {P}) <- {MEMO(R, P)}' | |
if s == memo_str_last.get((R, P)): | |
return | |
print(s) | |
memo_str_last[R, P] = s | |
# def EVAL(body: Body) -> AST: # [R.body] | |
# noinspection PyPep8Naming | |
def EVAL(R: 'RULE') -> AST: | |
""" | |
EVAL | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: evaluate\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
ast = R() | |
if ast is FAIL: | |
assert Pos == pos_orig | |
print(f'\x1b[34m{indent}{R.__name__}: failed\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
return FAIL | |
pos2 = Pos + ast.leaf_len | |
print(f'\x1b[34m{indent}{R.__name__}: matched: {BODY[pos_orig:pos2]}\x1b[0m') | |
if Pos != pos2: | |
print(f'{indent} Pos:{Pos}->{pos2} BODY:{BODY[Pos:pos2]}>{BODY[pos2:]}') | |
print(f'{indent} ast:{ast}') | |
Pos = pos2 | |
return ast | |
# noinspection PyPep8Naming | |
def APPLY_RULE(R: 'RULE', P: POS) -> AST: | |
""" | |
Figure 2. | |
APPLY-RULE(R, P) | |
""" | |
global Pos | |
indent = ' ' * n_stack() | |
m = MEMO(R, P) | |
if m is None: | |
m = MemoEntry(FAIL, P) | |
MEMO_set(R, P, m) | |
print_memo_if_changed(R, P, indent) | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
m.ans = ans | |
m.pos = Pos | |
print_memo_if_changed(R, P, indent) | |
return ans | |
else: | |
print(f'\x1b[34m{indent}{R.__name__}: using memo\x1b[0m') | |
print(f'{indent} Pos:{Pos}->{m.pos} BODY:{BODY[Pos:m.pos]}>{BODY[m.pos:]}') | |
print(f'{indent} m:{m}') | |
Pos = m.pos | |
return m.ans | |
# noinspection PyPep8Naming | |
def APPLY_RULE_or_rollback_Pos(R: 'RULE', P: POS, pos_rollback_to: POS) -> AST: | |
global Pos | |
ast = APPLY_RULE(R, P) | |
if ast is FAIL: | |
Pos = pos_rollback_to | |
return ast | |
# ----------------------------------------------------------------------------- | |
# RULE | |
# The type is defined in the paper | |
RULE = t.Callable[[], AST] | |
rule_expr: RULE | |
rule_num: RULE | |
rule_plus: RULE | |
rule_minus: RULE | |
def rule_expr() -> AST: | |
""" | |
expr ::= <expr> "-" <num> / <num> | |
""" | |
def rule1() -> AST: | |
"""<expr> "-" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_minus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_num() -> AST: | |
""" | |
<num> | |
""" | |
global BODY, Pos | |
match = re.search(r'^(\d+)', BODY[Pos:]) | |
if match is None: | |
return FAIL | |
return AST('num', match[1], len(match[1]), [], n_stack()) | |
def rule_plus() -> AST: | |
""" | |
"+" | |
""" | |
global BODY, Pos | |
if BODY[Pos] != '+': | |
return FAIL | |
return AST('"+"', '+', len('+'), [], n_stack()) | |
def rule_minus() -> AST: | |
""" | |
"-" | |
""" | |
global BODY, Pos | |
if BODY[Pos] != '-': | |
return FAIL | |
return AST('"-"', '-', len('-'), [], n_stack()) | |
# ----------------------------------------------------------------------------- | |
# main | |
if __name__ == '__main__': | |
BODY = Body('1-2-3') | |
Pos = POS(0) # Pos | |
pos_orig = Pos | |
# > The parser will then move on to the second choice, <num>, which will | |
# > succeed after consuming the "1", and leave the rest of the input, | |
# > "-2-3", unprocessed. | |
ast = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
print() | |
print(ast) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
""" | |
https://gist.github.com/wataash/dc9f8352dc147b223382def86c3a592b | |
A Python implementation of "Packrat Parsers Can Support Left Recursion" by | |
Alessandro Warth, James R. Douglass, and Todd Millstein. | |
http://www.vpri.org/pdf/tr2007002_packrat.pdf | |
3.2 Supporting Direct Left Recursion | |
""" | |
import collections | |
import dataclasses | |
import inspect | |
import re | |
import typing as t | |
def n_stack() -> int: | |
return len([None for x in inspect.stack() if x.function in ['EVAL', 'GROW_LR']]) - 1 | |
""" | |
[R.body]: | |
In the APPLY-RULE() function in the paper, the EVAL() function is invoked as: | |
EVAL(R.body) | |
where R is RULE. But I have no idea what the R.body (RULE's body) is -- | |
instead, we define the global variable "BODY", and invoke EVAL() function as: | |
EVAL(R) | |
""" | |
Body = t.NewType('Body', str) # not defined in the paper | |
# BODY = Body('1234-5') | |
POS = t.NewType('POS', int) # POS | |
# Pos = POS(0) # Pos | |
@dataclasses.dataclass(frozen=True) | |
class AST: | |
""" | |
AST | |
The data structure is not defined in the paper. | |
""" | |
type: str | |
string: str | |
leaf_len: int | |
children: list['AST'] | |
n_stack: int | |
def __repr__(self): | |
color = ['\x1b[31m', '\x1b[32m', '\x1b[33m'][self.n_stack % 3] | |
if self.children: | |
children = " ".join(str(x) for x in self.children) | |
return f'{color}<\x1b[0m{self.type}:{self.string} {children}{color}>\x1b[0m' | |
return f'{color}<\x1b[0m{self.type}:{self.string}{color}>\x1b[0m' | |
FAIL = AST('FAIL', '', 0, [], 0) # FAIL | |
@dataclasses.dataclass(frozen=False) | |
class LR: | |
"""LR""" | |
detected: bool | |
@dataclasses.dataclass(frozen=False) | |
class MemoEntry: | |
"""MEMOENTRY""" | |
ans: t.Union[AST, LR] | |
pos: POS | |
# keep order for better debug-print() | |
memo_: collections.OrderedDict[tuple['RULE', POS], MemoEntry] = collections.OrderedDict() | |
# noinspection PyPep8Naming | |
def MEMO(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
global memo_ | |
"""MEMO :(RULE, POS) -> MEMOENTRY""" | |
return memo_.get((R, P)) | |
# noinspection PyPep8Naming | |
def MEMO_set(R: 'RULE', P: POS, m: MemoEntry): | |
global memo_ | |
"""MEMO(R, P) <- m""" | |
assert (R, P) not in memo_ | |
memo_[R, P] = m | |
memo_str_last: dict[tuple['RULE', POS], str] = {} | |
def print_memo_if_changed(R: 'RULE', P: POS, indent: str): | |
global memo_str_last | |
s = f'{indent} MEMO({R.__name__}, {P}) <- {MEMO(R, P)}' | |
if s == memo_str_last.get((R, P)): | |
return | |
print(s) | |
memo_str_last[R, P] = s | |
# def EVAL(body: Body) -> AST: # [R.body] | |
# noinspection PyPep8Naming | |
def EVAL(R: 'RULE') -> AST: | |
""" | |
EVAL | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: evaluate\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
ast = R() | |
if ast is FAIL: | |
assert Pos == pos_orig | |
print(f'\x1b[34m{indent}{R.__name__}: failed\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
return FAIL | |
pos2 = Pos + ast.leaf_len | |
print(f'\x1b[34m{indent}{R.__name__}: matched: {BODY[pos_orig:pos2]}\x1b[0m') | |
if Pos != pos2: | |
print(f'{indent} Pos:{Pos}->{pos2} BODY:{BODY[Pos:pos2]}>{BODY[pos2:]}') | |
print(f'{indent} ast:{ast}') | |
Pos = pos2 | |
return ast | |
# noinspection PyPep8Naming | |
def APPLY_RULE(R: 'RULE', P: POS) -> AST: | |
""" | |
Figure 4. | |
APPLY-RULE(R, P) | |
""" | |
global Pos | |
indent = ' ' * n_stack() | |
m = MEMO(R, P) | |
if m is None: | |
lr = LR(False) | |
m = MemoEntry(lr, P) | |
MEMO_set(R, P, m) | |
print_memo_if_changed(R, P, indent) | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
m.ans = ans | |
m.pos = Pos | |
print_memo_if_changed(R, P, indent) | |
if lr.detected and ans is not FAIL: | |
print(f'\x1b[34m{indent} GROW-LR\x1b[0m') | |
return GROW_LR(R, P, m, None) | |
else: | |
return ans | |
else: | |
print(f'\x1b[34m{indent}{R.__name__}: using memo\x1b[0m') | |
print(f'{indent} Pos:{Pos}->{m.pos} BODY:{BODY[Pos:m.pos]}>{BODY[m.pos:]}') | |
print(f'{indent} m:{m}') | |
Pos = m.pos | |
if isinstance(m.ans, LR): | |
m.ans.detected = True | |
print_memo_if_changed(R, P, indent) | |
return FAIL | |
return m.ans | |
# noinspection PyPep8Naming | |
def APPLY_RULE_or_rollback_Pos(R: 'RULE', P: POS, pos_rollback_to: POS) -> AST: | |
global Pos | |
ast = APPLY_RULE(R, P) | |
if ast is FAIL: | |
Pos = pos_rollback_to | |
return ast | |
# noinspection PyPep8Naming | |
def GROW_LR(R: 'RULE', P: POS, M: MemoEntry, H: 'unknown') -> AST: | |
""" | |
Figure 3. | |
GROW-LR | |
""" | |
global Pos | |
# ... # line A | |
while True: | |
Pos = P | |
# ... # line B | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
if ans is FAIL or Pos <= M.pos: | |
break | |
M.ans = ans | |
M.pos = Pos | |
# ... # line C | |
Pos = M.pos | |
return M.ans | |
# ----------------------------------------------------------------------------- | |
# RULE | |
# The type is defined in the paper | |
RULE = t.Callable[[], AST] | |
rule_expr: RULE | |
rule_num: RULE | |
rule_plus: RULE | |
rule_minus: RULE | |
def rule_expr() -> AST: | |
""" | |
expr ::= <expr> "-" <num> / <num> | |
""" | |
def rule1() -> AST: | |
"""<expr> "-" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_minus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_num() -> AST: | |
""" | |
<num> | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
match = re.search(r'^(\d+)', BODY[Pos:]) | |
if match is None: | |
return FAIL | |
return AST('num', match[1], len(match[1]), [], n_stack()) | |
def rule_plus() -> AST: | |
""" | |
"+" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '+': | |
return FAIL | |
return AST('"+"', '+', len('+'), [], n_stack()) | |
def rule_minus() -> AST: | |
""" | |
"-" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '-': | |
return FAIL | |
return AST('"-"', '-', len('-'), [], n_stack()) | |
# ----------------------------------------------------------------------------- | |
# main | |
if __name__ == '__main__': | |
BODY = Body('1-2-3') | |
Pos = POS(0) # Pos | |
pos_orig = Pos | |
ast = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
print() | |
print(ast) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
""" | |
https://gist.github.com/wataash/dc9f8352dc147b223382def86c3a592b | |
A Python implementation of "Packrat Parsers Can Support Left Recursion" by | |
Alessandro Warth, James R. Douglass, and Todd Millstein. | |
http://www.vpri.org/pdf/tr2007002_packrat.pdf | |
3.2 Supporting Direct Left Recursion | |
""" | |
import collections | |
import dataclasses | |
import inspect | |
import re | |
import typing as t | |
def n_stack() -> int: | |
return len([None for x in inspect.stack() if x.function in ['EVAL', 'GROW_LR']]) - 1 | |
""" | |
[R.body]: | |
In the APPLY-RULE() function in the paper, the EVAL() function is invoked as: | |
EVAL(R.body) | |
where R is RULE. But I have no idea what the R.body (RULE's body) is -- | |
instead, we define the global variable "BODY", and invoke EVAL() function as: | |
EVAL(R) | |
""" | |
Body = t.NewType('Body', str) # not defined in the paper | |
# BODY = Body('1234-5') | |
POS = t.NewType('POS', int) # POS | |
# Pos = POS(0) # Pos | |
@dataclasses.dataclass(frozen=True) | |
class AST: | |
""" | |
AST | |
The data structure is not defined in the paper. | |
""" | |
type: str | |
string: str | |
leaf_len: int | |
children: list['AST'] | |
n_stack: int | |
def __repr__(self): | |
color = ['\x1b[31m', '\x1b[32m', '\x1b[33m'][self.n_stack % 3] | |
if self.children: | |
children = " ".join(str(x) for x in self.children) | |
return f'{color}<\x1b[0m{self.type}:{self.string} {children}{color}>\x1b[0m' | |
return f'{color}<\x1b[0m{self.type}:{self.string}{color}>\x1b[0m' | |
FAIL = AST('FAIL', '', 0, [], 0) # FAIL | |
@dataclasses.dataclass(frozen=False) | |
class LR: | |
"""LR""" | |
detected: bool | |
@dataclasses.dataclass(frozen=False) | |
class MemoEntry: | |
"""MEMOENTRY""" | |
ans: t.Union[AST, LR] | |
pos: POS | |
# keep order for better debug-print() | |
memo_: collections.OrderedDict[tuple['RULE', POS], MemoEntry] = collections.OrderedDict() | |
# noinspection PyPep8Naming | |
def MEMO(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
global memo_ | |
"""MEMO :(RULE, POS) -> MEMOENTRY""" | |
return memo_.get((R, P)) | |
# noinspection PyPep8Naming | |
def MEMO_set(R: 'RULE', P: POS, m: MemoEntry): | |
global memo_ | |
"""MEMO(R, P) <- m""" | |
assert (R, P) not in memo_ | |
memo_[R, P] = m | |
memo_str_last: dict[tuple['RULE', POS], str] = {} | |
def print_memo_if_changed(R: 'RULE', P: POS, indent: str): | |
global memo_str_last | |
s = f'{indent} MEMO({R.__name__}, {P}) <- {MEMO(R, P)}' | |
if s == memo_str_last.get((R, P)): | |
return | |
print(s) | |
memo_str_last[R, P] = s | |
# def EVAL(body: Body) -> AST: # [R.body] | |
# noinspection PyPep8Naming | |
def EVAL(R: 'RULE') -> AST: | |
""" | |
EVAL | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: evaluate\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
ast = R() | |
if ast is FAIL: | |
assert Pos == pos_orig | |
print(f'\x1b[34m{indent}{R.__name__}: failed\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
return FAIL | |
pos2 = Pos + ast.leaf_len | |
print(f'\x1b[34m{indent}{R.__name__}: matched: {BODY[pos_orig:pos2]}\x1b[0m') | |
if Pos != pos2: | |
print(f'{indent} Pos:{Pos}->{pos2} BODY:{BODY[Pos:pos2]}>{BODY[pos2:]}') | |
print(f'{indent} ast:{ast}') | |
Pos = pos2 | |
return ast | |
# noinspection PyPep8Naming | |
def APPLY_RULE(R: 'RULE', P: POS) -> AST: | |
""" | |
Figure 4. | |
APPLY-RULE(R, P) | |
""" | |
global Pos | |
indent = ' ' * n_stack() | |
m = MEMO(R, P) | |
if m is None: | |
lr = LR(False) | |
m = MemoEntry(lr, P) | |
MEMO_set(R, P, m) | |
print_memo_if_changed(R, P, indent) | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
m.ans = ans | |
m.pos = Pos | |
print_memo_if_changed(R, P, indent) | |
if lr.detected and ans is not FAIL: | |
print(f'\x1b[34m{indent} GROW-LR\x1b[0m') | |
return GROW_LR(R, P, m, None) | |
else: | |
return ans | |
else: | |
print(f'\x1b[34m{indent}{R.__name__}: using memo\x1b[0m') | |
print(f'{indent} Pos:{Pos}->{m.pos} BODY:{BODY[Pos:m.pos]}>{BODY[m.pos:]}') | |
print(f'{indent} m:{m}') | |
Pos = m.pos | |
if isinstance(m.ans, LR): | |
m.ans.detected = True | |
print_memo_if_changed(R, P, indent) | |
return FAIL | |
return m.ans | |
# noinspection PyPep8Naming | |
def APPLY_RULE_or_rollback_Pos(R: 'RULE', P: POS, pos_rollback_to: POS) -> AST: | |
global Pos | |
ast = APPLY_RULE(R, P) | |
if ast is FAIL: | |
Pos = pos_rollback_to | |
return ast | |
# noinspection PyPep8Naming | |
def GROW_LR(R: 'RULE', P: POS, M: MemoEntry, H: 'unknown') -> AST: | |
""" | |
Figure 3. | |
GROW-LR | |
""" | |
global Pos | |
# ... # line A | |
while True: | |
Pos = P | |
# ... # line B | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
if ans is FAIL or Pos <= M.pos: | |
break | |
M.ans = ans | |
M.pos = Pos | |
# ... # line C | |
Pos = M.pos | |
return M.ans | |
# ----------------------------------------------------------------------------- | |
# RULE | |
# The type is defined in the paper | |
RULE = t.Callable[[], AST] | |
rule_expr: RULE | |
rule_num: RULE | |
rule_plus: RULE | |
rule_minus: RULE | |
def rule_term() -> AST: | |
""" | |
term ::= <term> "+" <fact> | |
/ <term> "-" <fact> | |
/ <fact> | |
""" | |
def rule1() -> AST: | |
"""<term> "+" <fact>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_term, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_plus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_fact, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('term', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<term> "-" <fact>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_term, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_minus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_fact, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('term', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule3() -> AST: | |
"""<num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_fact, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
return AST('term', BODY[pos_orig:Pos], 0, [ast0], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
ast = rule3() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_fact() -> AST: | |
""" | |
fact ::= <fact> "*" <num> | |
/ <fact> "/" <num> | |
/ <num> | |
""" | |
def rule1() -> AST: | |
"""<fact> "*" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_fact, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_mul, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('fact', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<fact> "/" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_fact, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_div, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('fact', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule3() -> AST: | |
"""<num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
return AST('fact', BODY[pos_orig:Pos], 0, [ast0], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
ast = rule3() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_num() -> AST: | |
""" | |
<num> | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
match = re.search(r'^(\d+)', BODY[Pos:]) | |
if match is None: | |
return FAIL | |
return AST('<num>', match[1], len(match[1]), [], n_stack()) | |
def rule_plus() -> AST: | |
""" | |
"+" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '+': | |
return FAIL | |
return AST('"+"', '+', len('+'), [], n_stack()) | |
def rule_minus() -> AST: | |
""" | |
"-" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '-': | |
return FAIL | |
return AST('"-"', '-', len('-'), [], n_stack()) | |
def rule_mul() -> AST: | |
""" | |
"*" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '*': | |
return FAIL | |
return AST('"*"', '*', len('*'), [], n_stack()) | |
def rule_div() -> AST: | |
""" | |
"/" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '/': | |
return FAIL | |
return AST('"/"', '/', len('/'), [], n_stack()) | |
# ----------------------------------------------------------------------------- | |
# main | |
if __name__ == '__main__': | |
BODY = Body('1-2/3') | |
Pos = POS(0) # Pos | |
pos_orig = Pos | |
ast = APPLY_RULE_or_rollback_Pos(rule_term, Pos, pos_orig) | |
print() | |
print(ast) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
""" | |
https://gist.github.com/wataash/dc9f8352dc147b223382def86c3a592b | |
A Python implementation of "Packrat Parsers Can Support Left Recursion" by | |
Alessandro Warth, James R. Douglass, and Todd Millstein. | |
http://www.vpri.org/pdf/tr2007002_packrat.pdf | |
3.3 Getting Ready For Indirect Left Recursion | |
""" | |
import collections | |
import dataclasses | |
import inspect | |
import re | |
import typing as t | |
def n_stack() -> int: | |
return len([None for x in inspect.stack() if x.function in ['EVAL', 'GROW_LR']]) - 1 | |
""" | |
[R.body]: | |
In the APPLY-RULE() function in the paper, the EVAL() function is invoked as: | |
EVAL(R.body) | |
where R is RULE. But I have no idea what the R.body (RULE's body) is -- | |
instead, we define the global variable "BODY", and invoke EVAL() function as: | |
EVAL(R) | |
""" | |
Body = t.NewType('Body', str) # not defined in the paper | |
# BODY = Body('1234-5') | |
POS = t.NewType('POS', int) # POS | |
# Pos = POS(0) # Pos | |
@dataclasses.dataclass(frozen=True) | |
class AST: | |
""" | |
AST | |
The data structure is not defined in the paper. | |
""" | |
type: str | |
string: str | |
leaf_len: int | |
children: list['AST'] | |
n_stack: int | |
def __repr__(self): | |
color = ['\x1b[31m', '\x1b[32m', '\x1b[33m'][self.n_stack % 3] | |
if self.children: | |
children = " ".join(str(x) for x in self.children) | |
return f'{color}<\x1b[0m{self.type}:{self.string} {children}{color}>\x1b[0m' | |
return f'{color}<\x1b[0m{self.type}:{self.string}{color}>\x1b[0m' | |
FAIL = AST('FAIL', '', 0, [], 0) # FAIL | |
@dataclasses.dataclass(frozen=False) | |
class LR: | |
"""LR""" | |
detected: bool | |
@dataclasses.dataclass(frozen=False) | |
class MemoEntry: | |
"""MEMOENTRY""" | |
ans: t.Union[AST, LR] | |
pos: POS | |
# keep order for better debug-print() | |
memo_: collections.OrderedDict[tuple['RULE', POS], MemoEntry] = collections.OrderedDict() | |
# noinspection PyPep8Naming | |
def MEMO(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
global memo_ | |
"""MEMO :(RULE, POS) -> MEMOENTRY""" | |
return memo_.get((R, P)) | |
# noinspection PyPep8Naming | |
def MEMO_set(R: 'RULE', P: POS, m: MemoEntry): | |
global memo_ | |
"""MEMO(R, P) <- m""" | |
assert (R, P) not in memo_ | |
memo_[R, P] = m | |
memo_str_last: dict[tuple['RULE', POS], str] = {} | |
def print_memo_if_changed(R: 'RULE', P: POS, indent: str): | |
global memo_str_last | |
s = f'{indent} MEMO({R.__name__}, {P}) <- {MEMO(R, P)}' | |
if s == memo_str_last.get((R, P)): | |
return | |
print(s) | |
memo_str_last[R, P] = s | |
# def EVAL(body: Body) -> AST: # [R.body] | |
# noinspection PyPep8Naming | |
def EVAL(R: 'RULE') -> AST: | |
""" | |
EVAL | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: evaluate\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
ast = R() | |
if ast is FAIL: | |
assert Pos == pos_orig | |
print(f'\x1b[34m{indent}{R.__name__}: failed\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
return FAIL | |
pos2 = Pos + ast.leaf_len | |
print(f'\x1b[34m{indent}{R.__name__}: matched: {BODY[pos_orig:pos2]}\x1b[0m') | |
if Pos != pos2: | |
print(f'{indent} Pos:{Pos}->{pos2} BODY:{BODY[Pos:pos2]}>{BODY[pos2:]}') | |
print(f'{indent} ast:{ast}') | |
Pos = pos2 | |
return ast | |
# noinspection PyPep8Naming | |
def APPLY_RULE(R: 'RULE', P: POS) -> AST: | |
""" | |
Figure 4. | |
APPLY-RULE(R, P) | |
""" | |
global Pos | |
indent = ' ' * n_stack() | |
m = MEMO(R, P) | |
if m is None: | |
lr = LR(False) | |
m = MemoEntry(lr, P) | |
MEMO_set(R, P, m) | |
print_memo_if_changed(R, P, indent) | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
m.ans = ans | |
m.pos = Pos | |
print_memo_if_changed(R, P, indent) | |
if lr.detected and ans is not FAIL: | |
print(f'\x1b[34m{indent} GROW-LR\x1b[0m') | |
return GROW_LR(R, P, m, None) | |
else: | |
return ans | |
else: | |
print(f'\x1b[34m{indent}{R.__name__}: using memo\x1b[0m') | |
print(f'{indent} Pos:{Pos}->{m.pos} BODY:{BODY[Pos:m.pos]}>{BODY[m.pos:]}') | |
print(f'{indent} m:{m}') | |
Pos = m.pos | |
if isinstance(m.ans, LR): | |
m.ans.detected = True | |
print_memo_if_changed(R, P, indent) | |
return FAIL | |
return m.ans | |
# noinspection PyPep8Naming | |
def APPLY_RULE_or_rollback_Pos(R: 'RULE', P: POS, pos_rollback_to: POS) -> AST: | |
global Pos | |
ast = APPLY_RULE(R, P) | |
if ast is FAIL: | |
Pos = pos_rollback_to | |
return ast | |
# noinspection PyPep8Naming | |
def GROW_LR(R: 'RULE', P: POS, M: MemoEntry, H: 'unknown') -> AST: | |
""" | |
Figure 3. | |
GROW-LR | |
""" | |
global Pos | |
# ... # line A | |
while True: | |
Pos = P | |
# ... # line B | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
if ans is FAIL or Pos <= M.pos: | |
break | |
M.ans = ans | |
M.pos = Pos | |
# ... # line C | |
Pos = M.pos | |
return M.ans | |
# ----------------------------------------------------------------------------- | |
# RULE | |
# The type is defined in the paper | |
RULE = t.Callable[[], AST] | |
rule_expr: RULE | |
rule_num: RULE | |
rule_plus: RULE | |
rule_minus: RULE | |
def rule_x() -> AST: | |
""" | |
x ::= <expr> | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
if ast == FAIL: | |
return ast | |
return AST('x', BODY[pos_orig:Pos], 0, [ast], n_stack()) | |
def rule_expr() -> AST: | |
""" | |
expr ::= <x> "-" <num> / <num> | |
""" | |
def rule1() -> AST: | |
"""<fact> "*" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_x, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_minus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<fact> "/" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_num() -> AST: | |
""" | |
<num> | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
match = re.search(r'^(\d+)', BODY[Pos:]) | |
if match is None: | |
return FAIL | |
return AST('num', match[1], len(match[1]), [], n_stack()) | |
def rule_plus() -> AST: | |
""" | |
"+" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '+': | |
return FAIL | |
return AST('"+"', '+', len('+'), [], n_stack()) | |
def rule_minus() -> AST: | |
""" | |
"-" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '-': | |
return FAIL | |
return AST('"-"', '-', len('-'), [], n_stack()) | |
def rule_mul() -> AST: | |
""" | |
"*" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '*': | |
return FAIL | |
return AST('"*"', '*', len('*'), [], n_stack()) | |
def rule_div() -> AST: | |
""" | |
"/" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '/': | |
return FAIL | |
return AST('"/"', '/', len('/'), [], n_stack()) | |
# ----------------------------------------------------------------------------- | |
# main | |
if __name__ == '__main__': | |
BODY = Body('4-3') | |
Pos = POS(0) # Pos | |
pos_orig = Pos | |
# > This is clearly not the behavior we wanted. | |
ast = APPLY_RULE_or_rollback_Pos(rule_x, Pos, pos_orig) | |
print() | |
print(ast) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
""" | |
https://gist.github.com/wataash/dc9f8352dc147b223382def86c3a592b | |
A Python implementation of "Packrat Parsers Can Support Left Recursion" by | |
Alessandro Warth, James R. Douglass, and Todd Millstein. | |
http://www.vpri.org/pdf/tr2007002_packrat.pdf | |
3.4 Adding Support for Left Recursion | |
""" | |
import collections | |
import dataclasses | |
import inspect | |
import re | |
import typing as t | |
def n_stack() -> int: | |
return len([None for x in inspect.stack() if x.function in ['EVAL', 'GROW_LR']]) - 1 | |
""" | |
[R.body]: | |
In the APPLY-RULE() function in the paper, the EVAL() function is invoked as: | |
EVAL(R.body) | |
where R is RULE. But I have no idea what the R.body (RULE's body) is -- | |
instead, we define the global variable "BODY", and invoke EVAL() function as: | |
EVAL(R) | |
""" | |
Body = t.NewType('Body', str) # not defined in the paper | |
# BODY = Body('1234-5') | |
POS = t.NewType('POS', int) # POS | |
# Pos = POS(0) # Pos | |
@dataclasses.dataclass(frozen=True) | |
class AST: | |
""" | |
AST | |
The data structure is not defined in the paper. | |
""" | |
type: str | |
string: str | |
leaf_len: int | |
children: list['AST'] | |
n_stack: int | |
def __repr__(self): | |
color = ['\x1b[31m', '\x1b[32m', '\x1b[33m'][self.n_stack % 3] | |
if self.children: | |
children = " ".join(str(x) for x in self.children) | |
return f'{color}<\x1b[0m{self.type}:{self.string} {children}{color}>\x1b[0m' | |
return f'{color}<\x1b[0m{self.type}:{self.string}{color}>\x1b[0m' | |
FAIL = AST('FAIL', '', 0, [], 0) # FAIL | |
@dataclasses.dataclass(frozen=False) | |
class HEAD: | |
"""HEAD""" | |
rule: 'RULE' | |
involvedSet: set['RULE'] | |
evalSet: set['RULE'] | |
def __repr__(self): | |
return f'HEAD[{self.rule.__name__} {{{" ".join([x.__name__ for x in self.involvedSet])}}} {{{" ".join([x.__name__ for x in self.evalSet])}}}]' | |
@dataclasses.dataclass(frozen=False) | |
class LR: | |
"""LR""" | |
seed: AST | |
rule: 'RULE' | |
head: t.Optional[HEAD] | |
next: 'LR' | |
def __repr__(self): | |
l = self.next | |
n_next = 0 # excluding 'unknown_stack_bottom_value' | |
while l != 'unknown_stack_bottom_value': | |
l = l.next | |
n_next += 1 | |
return f'\x1b[35mLR[\x1b[0m{self.seed} {self.rule.__name__} {self.head} {n_next}\x1b[35m]\x1b[0m' | |
@dataclasses.dataclass(frozen=False) | |
class MemoEntry: | |
"""MEMOENTRY""" | |
ans: t.Union[AST, LR] | |
pos: POS | |
def __repr__(self): | |
return f'M[{self.pos} {self.ans}]' | |
# keep order for better debug-print() | |
memo_: collections.OrderedDict[tuple['RULE', POS], MemoEntry] = collections.OrderedDict() | |
# noinspection PyPep8Naming | |
def MEMO(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
"""MEMO :(RULE, POS) -> MEMOENTRY""" | |
global memo_ | |
return memo_.get((R, P)) | |
# noinspection PyPep8Naming | |
def MEMO_set(R: 'RULE', P: POS, m: MemoEntry): | |
"""MEMO(R, P) <- m""" | |
global memo_ | |
assert (R, P) not in memo_ | |
memo_[R, P] = m | |
memo_str_last: dict[tuple['RULE', POS], str] = {} | |
def print_memo_if_changed(R: 'RULE', P: POS, indent: str): | |
global memo_str_last | |
s = f'{indent} MEMO({R.__name__}, {P}) <- {MEMO(R, P)}' | |
if s == memo_str_last.get((R, P)): | |
return | |
print(s) | |
memo_str_last[R, P] = s | |
# def EVAL(body: Body) -> AST: # [R.body] | |
# noinspection PyPep8Naming | |
def EVAL(R: 'RULE') -> AST: | |
""" | |
EVAL | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: evaluate\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
ast = R() | |
if ast is FAIL: | |
assert Pos == pos_orig | |
print(f'\x1b[34m{indent}{R.__name__}: failed\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
return FAIL | |
pos2 = Pos + ast.leaf_len | |
print(f'\x1b[34m{indent}{R.__name__}: matched: {BODY[pos_orig:pos2]}\x1b[0m') | |
if Pos != pos2: | |
print(f'{indent} Pos:{Pos}->{pos2} BODY:{BODY[Pos:pos2]}>{BODY[pos2:]}') | |
print(f'{indent} ast:{ast}') | |
Pos = pos2 | |
return ast | |
HEADS: dict[POS, t.Optional[HEAD]] = {} # HEADS | |
# noinspection PyPep8Naming | |
def RECALL(R: 'RULE', P: POS) -> t.Optional[MemoEntry]: | |
""" | |
Figure 9. | |
RECALL(R, P) | |
""" | |
indent = ' ' * n_stack() | |
m = MEMO(R, P) | |
h = HEADS.get(P) | |
print(f'{indent} RECALL: m:{m} h:{h}') | |
# If not growing a sed parse, just return what is stored | |
# in the memo table. | |
if h is None: | |
return m | |
# Do not evaluate any rule that is not involved in this | |
# left recursion. | |
# if m is None and R not in {h.head} | h.involvedSet # typo? | |
if m is None and R not in {h.rule} | h.involvedSet: | |
return MemoEntry(FAIL, P) | |
# Allow involved rules to be evaluated, but only once, | |
# during a seed-growing iteration. | |
if R in h.evalSet: | |
h.evalSet = h.evalSet - {R} | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) | |
m.ans = ans | |
m.pos = Pos | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
return m | |
# noinspection PyPep8Naming | |
def LR_ANSWER(R: 'RULE', P: POS, M: MemoEntry) -> AST: | |
""" | |
Figure 8. | |
LR-ANSWER(R, P, M) | |
""" | |
indent = ' ' * n_stack() | |
h = M.ans.head | |
if h.rule is not R: | |
return M.ans.seed | |
else: | |
M.ans = M.ans.seed | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
if M.ans is FAIL: | |
return FAIL | |
else: | |
return GROW_LR(R, P, M, h) | |
LRStack: LR = 'unknown_stack_bottom_value' # LRStack | |
LRStack_str_last = '' | |
def print_lr_stack_if_changed(indent: str): | |
global LRStack, LRStack_str_last | |
if LRStack == 'unknown_stack_bottom_value': | |
return | |
s = '' | |
s += f'{indent} LRStack:{LRStack}\n' | |
l = LRStack.next | |
while l != 'unknown_stack_bottom_value': | |
s+= f'{indent} {l}\n' | |
l = l.next | |
if s != LRStack_str_last: | |
print(s, end='') | |
LRStack_str_last = s | |
# noinspection PyPep8Naming | |
def SETUP_LR(R: 'RULE', L: LR): | |
""" | |
Figure 7. | |
SETUP-LR(R, L) | |
""" | |
global LRStack | |
if L.head is None: | |
L.head = HEAD(R, set(), set()) | |
s = LRStack | |
# while s.head != L.head: | |
while s != 'unknown_stack_bottom_value' and s.head != L.head: | |
s.head = L.head | |
L.head.involvedSet = L.head.involvedSet | {s.rule} | |
print_lr_stack_if_changed(' ' * n_stack()) | |
# noinspection PyPep8Naming | |
def APPLY_RULE(R: 'RULE', P: POS) -> AST: | |
""" | |
Figure 6. | |
APPLY-RULE(R, P) | |
""" | |
global LRStack, Pos | |
indent = ' ' * n_stack() | |
print(f'\x1b[34m{indent}{R.__name__}: APPLY-RULE\x1b[0m') | |
print(f'{indent} {Pos} {BODY[Pos:]}') | |
m = RECALL(R, P) | |
if m is None: | |
# Create a new LR and push it onto the rule | |
# invocation stack. | |
lr = LR(FAIL, R, None, LRStack) | |
LRStack = lr | |
# Memoize lr, then evaluate R. | |
m = MemoEntry(lr, P) | |
MEMO_set(R, P, m) | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
# Pop lr off the rule invocation stack. | |
LRStack = LRStack.next | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
m.pos = Pos | |
if lr.head is not None: | |
lr.seed = ans | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
return LR_ANSWER(R, P, m) | |
else: | |
m.ans = ans | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
return ans | |
else: | |
print(f'\x1b[34m{indent}{R.__name__}: using memo\x1b[0m') | |
print(f'{indent} Pos:{Pos}->{m.pos} BODY:{BODY[Pos:m.pos]}>{BODY[m.pos:]}') | |
print(f'{indent} m:{m}') | |
Pos = m.pos | |
if isinstance(m.ans, LR): | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
print(f'\x1b[34m{indent} SETUP-LR\x1b[0m') | |
SETUP_LR(R, m.ans) | |
print_memo_if_changed(R, P, indent) | |
print_lr_stack_if_changed(indent) | |
return m.ans.seed | |
else: | |
return m.ans | |
# noinspection PyPep8Naming | |
def APPLY_RULE_or_rollback_Pos(R: 'RULE', P: POS, pos_rollback_to: POS) -> AST: | |
global Pos | |
ast = APPLY_RULE(R, P) | |
if ast is FAIL: | |
Pos = pos_rollback_to | |
return ast | |
# noinspection PyPep8Naming | |
def GROW_LR(R: 'RULE', P: POS, M: MemoEntry, H: HEAD) -> AST: | |
""" | |
Figure 3. | |
GROW-LR | |
""" | |
global Pos | |
HEADS[P] = H # line A | |
while True: | |
Pos = P | |
H.evalSet = H.involvedSet.copy() # line B | |
# ans = EVAL(R.body) # [R.body] | |
ans = EVAL(R) # [R.body] | |
if ans is FAIL or Pos <= M.pos: | |
break | |
M.ans = ans | |
M.pos = Pos | |
HEADS[P] = None # line C | |
Pos = M.pos | |
return M.ans | |
# ----------------------------------------------------------------------------- | |
# RULE | |
# The type is defined in the paper | |
RULE = t.Callable[[], AST] | |
rule_expr: RULE | |
rule_num: RULE | |
rule_plus: RULE | |
rule_minus: RULE | |
def rule_x() -> AST: | |
""" | |
x ::= <expr> | |
""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast = APPLY_RULE_or_rollback_Pos(rule_expr, Pos, pos_orig) | |
if ast == FAIL: | |
return ast | |
return AST('x', BODY[pos_orig:Pos], 0, [ast], n_stack()) | |
def rule_expr() -> AST: | |
""" | |
expr ::= <x> "-" <num> / <num> | |
""" | |
def rule1() -> AST: | |
"""<fact> "*" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_x, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
ast1 = APPLY_RULE_or_rollback_Pos(rule_minus, Pos, pos_orig) | |
if ast1 == FAIL: | |
return FAIL | |
ast2 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast2 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0, ast1, ast2], n_stack()) | |
def rule2() -> AST: | |
"""<fact> "/" <num>""" | |
global BODY, Pos | |
pos_orig = Pos | |
ast0 = APPLY_RULE_or_rollback_Pos(rule_num, Pos, pos_orig) | |
if ast0 == FAIL: | |
return FAIL | |
return AST('expr', BODY[pos_orig:Pos], 0, [ast0], n_stack()) | |
global BODY, Pos | |
ast = rule1() | |
if ast != FAIL: | |
return ast | |
ast = rule2() | |
if ast != FAIL: | |
return ast | |
return FAIL | |
def rule_num() -> AST: | |
""" | |
<num> | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
match = re.search(r'^(\d+)', BODY[Pos:]) | |
if match is None: | |
return FAIL | |
return AST('num', match[1], len(match[1]), [], n_stack()) | |
def rule_plus() -> AST: | |
""" | |
"+" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '+': | |
return FAIL | |
return AST('"+"', '+', len('+'), [], n_stack()) | |
def rule_minus() -> AST: | |
""" | |
"-" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '-': | |
return FAIL | |
return AST('"-"', '-', len('-'), [], n_stack()) | |
def rule_mul() -> AST: | |
""" | |
"*" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '*': | |
return FAIL | |
return AST('"*"', '*', len('*'), [], n_stack()) | |
def rule_div() -> AST: | |
""" | |
"/" | |
""" | |
global BODY, Pos | |
if Pos >= len(BODY): | |
return FAIL | |
if BODY[Pos] != '/': | |
return FAIL | |
return AST('"/"', '/', len('/'), [], n_stack()) | |
# ----------------------------------------------------------------------------- | |
# main | |
if __name__ == '__main__': | |
BODY = Body('4-3') | |
Pos = POS(0) # Pos | |
pos_orig = Pos | |
# > This is clearly not the behavior we wanted. | |
ast = APPLY_RULE_or_rollback_Pos(rule_x, Pos, pos_orig) | |
print() | |
print(ast) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment