Last active
October 27, 2021 15:44
-
-
Save gdevanla/12a52739a9c369c20c41aafba5d4e2d5 to your computer and use it in GitHub Desktop.
Parser Combinator in Python - Notes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import operator | |
import typing as tp | |
""" | |
a. Structure of Parser | |
p = (s -> Any object) | |
Partially consuming parser | |
p1 = (s -> (a, remaining)) | |
b. Build Small parsers | |
i. Parse any char | |
ii. Specific char | |
iii. Only digits | |
iv. New abstraction | |
c. Composing the small parsers | |
i. compose | |
ii. bind (may be) | |
d. Parser Combinators | |
i. chooseP | |
ii. Parse and eval a simple expression | |
iii. Support more complicated expression | |
""" | |
import operator | |
import typing as tp | |
ParserP = tp.Callable[[str], tp.Tuple[tp.Any, str]] | |
# A simple parser | |
example_parser = lambda s: (s[0], s[1:]) | |
class ParserError(Exception): | |
def __init__(self, msg, content): | |
super().__init__(f"{msg}: {content}") | |
# Some helper functions | |
def parse(p: ParserP, s: str) -> tp.Tuple[tp.Any, str]: | |
(a, s) = p(s) | |
return (a, s) | |
# Tiny parsers | |
def anyChar() -> ParserP: | |
def func(s): | |
return (s[0], s[1:]) | |
return func | |
# Parser for a specific character | |
def oneChar(c) -> ParserP: | |
def func(s): | |
if s[0] == c: | |
return (s[0], s[1:]) | |
raise ParserError(f"Unexpected {s[0]}, expecting {c}", s) | |
return func | |
# Parse for specific digit | |
def anyDigit() -> ParserP: | |
def func(s): | |
if s[0].isdigit(): | |
return (s[0], s[1:]) | |
raise ParserError(f"Expected digit, got {s[0]}", s) | |
return func | |
# Time for some abstraction | |
def oneCharP(c) -> ParserP: | |
return satisfy(lambda c1: c == c1) | |
def anyDigitP() -> ParserP: | |
return satisfy(lambda c: c.isdigit()) | |
# Generic Predicate Parser | |
def satisfy(pred_function: tp.Callable[["char"], bool]) -> ParserP: | |
def func(s): | |
if not s: | |
raise ParserError("Empty string", "") | |
if pred_function(s[0]): | |
return (s[0], s[1:]) | |
raise ParserError(f"Unexpected condition", s) | |
return func | |
# Compose parsers | |
def compose(p1: ParserP, p2: ParserP) -> ParserP: | |
def func(s): | |
(a, s1) = parse(p1, s) | |
(b, s2) = parse(p2, s1) | |
return ((a, b), s2) | |
return func | |
# Other interesting functions | |
# | |
# Parse a string | |
def strP(es: str) -> ParserP: | |
def func(s): | |
p = oneCharP(es[0]) | |
for c in es[1:]: | |
p = compose(p, oneCharP(c)) | |
return p(s) | |
return func | |
# 2 + 3 + 4 | |
# def expression(): | |
# parse digit - we can use anyDigit() | |
# parse operator - we need to be able to choose different options | |
# parse digit | |
# A choose parser combinator | |
def chooseP(p1: ParserP, p2: ParserP) -> ParserP: | |
def func(s): | |
try: | |
return p1(s) | |
except ParserError: | |
return p2(s) | |
return func | |
# now lets use chooseP | |
def mathOp(op): | |
return satisfy(lambda c: c == op) | |
def mathOpP() -> ParserP: | |
plus = mathOp("+") | |
minus = mathOp("-") | |
mult = mathOp("*") | |
div = mathOp("/") | |
return chooseP(plus, chooseP(minus, chooseP(mult, minus))) | |
# let start writing our expression | |
def expression_does_not_work(): | |
def func(s): | |
(digit1, s1) = parse(anyDigitP(), s) | |
(op, s2) = parse(mathOpP(), s1) # this does not work | |
(digit2, s3) = parse(anyDigitP(), s2) | |
return ((int(digit1), op, int(digit2)), s3) | |
return func | |
# Introduce the `bind` function | |
ParserF = tp.Callable[[tp.Any], ParserP] | |
def bind(p1: ParserP, pf: ParserF) -> ParserP: | |
def func(s): | |
(a, s1) = parse(p1, s) | |
p2 = pf(a) | |
(b, s2) = parse(p2, s1) | |
return (b, s2) | |
return func | |
def mathOpP1() -> ParserP: | |
def f(op): | |
if op == "+": | |
return operator.add | |
elif op == "-": | |
return operator.sub | |
elif op == "*": | |
return operator.mul | |
elif op == "/": | |
return operator.floordiv | |
plus = bind(mathOp("+"), lambda a: lambda s: (f(a), s)) | |
minus = bind(mathOp("-"), lambda a: lambda s: (f(a), s)) | |
mult = bind(mathOp("*"), lambda a: lambda s: (f(a), s)) | |
div = bind(mathOp("/"), lambda a: lambda s: (f(a), s)) | |
return chooseP(plus, chooseP(minus, chooseP(mult, div))) | |
# let start writing our expression | |
def expression(): | |
def func(s): | |
(digit1, s1) = parse(anyDigitP(), s) | |
(op, s2) = parse(mathOpP1(), s1) # this does not work | |
(digit2, s3) = parse(anyDigitP(), s2) | |
return (op(int(digit1), int(digit2)), s3) | |
return func | |
# need a way to recurse | |
def expression_or_digit(): | |
return chooseP(expression_2(), anyDigitP()) | |
def expression_2(): | |
def func(s): | |
(digit1, s1) = parse(anyDigitP(), s) | |
(op, s2) = parse(mathOpP1(), s1) | |
(digit2, s3) = parse(expression_or_digit(), s2) | |
return (op(int(digit1), int(digit2)), s3) | |
return func | |
# need more combinators | |
# def chainl(op: ParserP, base: ParserP): | |
# def bob(x): | |
# return chooseP(grab(x), (lambda s: (x, s))) | |
# def grab(x): | |
# def func0(s): | |
# (oper, s1) = op(s) | |
# (operand2, s2) = base(s1) | |
# return (bob(oper(x, operand2)), s2) | |
# return func0 | |
# return bind(base, bob) | |
# Revisiting strP | |
# Parse a string | |
def strP1(es: str) -> ParserP: | |
def f2(c): | |
def f(x): | |
f1 = lambda xs: lambda s: (x + xs, s) | |
return bind(oneCharP(c), f1) | |
return f | |
def func(s): | |
p = oneCharP(es[0]) | |
for c in es[1:]: | |
p = bind(p, f2(c)) | |
return p(s) | |
return func | |
# Another simple combinator | |
def manyP(p: ParserP): | |
def func(s): | |
result = [] | |
while True: | |
try: | |
(a, s) = parse(p, s) | |
result.append(a) | |
if not s: | |
break | |
except ParserError: | |
break | |
return ("".join(result), s) | |
return func |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment