-
-
Save mtasic85/8f053b6d1e0503248aed1b94bdeb7d8b to your computer and use it in GitHub Desktop.
Python - PEG - Parsing Combinators - Simple JSON parser v2.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
PEG - Parsing Combinators | |
Example: Simple JSON parser | |
''' | |
from typing import Any, Callable, Union | |
from string import digits, whitespace | |
# | |
# Combinator | |
# | |
class Combinator: | |
parser: 'Parser' | |
def __init__(self, parser: 'Parser'): | |
self.parser = parser | |
def parse(self) -> bool: | |
raise NotImplementedError() | |
class String(Combinator): | |
def parse(self) -> bool: | |
self.parser.skip_whitespace() | |
return self.parser.parse_stringlet() | |
class Char(Combinator): | |
c: str | |
def __init__(self, parser: 'Parser', c: str): | |
super().__init__(parser) | |
self.c = c | |
def parse(self) -> bool: | |
self.parser.skip_whitespace() | |
return self.parser.parse_char(self.c) | |
class Number(Combinator): | |
def parse(self) -> bool: | |
self.parser.skip_whitespace() | |
return self.parser.parse_number() | |
class Sequence(Combinator): | |
children: list[Combinator] | |
def __init__(self, parser: 'Parser', *children): | |
super().__init__(parser) | |
self.children = children | |
def parse(self) -> bool: | |
pos0 = self.parser.pos | |
for child in self.children: | |
if not child.parse(): | |
self.parser.pos = pos0 | |
self.parser.error = (self.parser.line, self.parser.col, 'expected Sequence') | |
return False | |
return True | |
class ForwardReference(Combinator): | |
supplier: Callable | |
def __init__(self, parser: 'Parser', supplier: Callable): | |
super().__init__(parser) | |
self.supplier = supplier | |
def parse(self) -> bool: | |
return self.supplier().parse() | |
class Repetition(Combinator): | |
child: Combinator | |
def __init__(self, parser: 'Parser', child: Combinator): | |
super().__init__(parser) | |
self.child = child | |
def parse(self) -> bool: | |
while self.child.parse(): | |
pass | |
return True | |
class Optional_(Combinator): | |
child: Combinator | |
def __init__(self, parser: 'Parser', child: Combinator): | |
super().__init__(parser) | |
self.child = child | |
def parse(self) -> bool: | |
self.child.parse() | |
return True | |
class Choice(Combinator): | |
children: list[Combinator] | |
def __init__(self, parser: 'Parser', *children): | |
super().__init__(parser) | |
self.children = children | |
def parse(self): | |
for child in self.children: | |
if child.parse(): | |
return True | |
self.parser.error = (self.parser.line, self.parser.col, 'expected choice') | |
return False | |
class Object(Combinator): | |
child: Combinator | |
def __init__(self, parser: 'Parser', child: Combinator): | |
super().__init__(parser) | |
self.child = child | |
def parse(self): | |
stack0: int = len(self.parser.stack) | |
success: bool = self.child.parse() | |
if not success: | |
self.parser.error = (self.parser.line, self.parser.col, 'could not parse object') | |
return False | |
o: dict[str, Any] = {} | |
while len(self.parser.stack) > stack0: | |
v: Any = self.parser.stack.pop() | |
k: str = self.parser.stack.pop() | |
o[k] = v | |
o = {k: v for k, v in reversed(o.items())} | |
self.parser.stack.append(o) | |
return True | |
class Array(Combinator): | |
child: Combinator | |
def __init__(self, parser: 'Parser', child: Combinator): | |
super().__init__(parser) | |
self.child = child | |
def parse(self): | |
stack0: int = len(self.parser.stack) | |
success: bool = self.child.parse() | |
if not success: | |
self.parser.error = (self.parser.line, self.parser.col, 'could not parse array') | |
return False | |
o: list[Any] = [] | |
while len(self.parser.stack) > stack0: | |
v: Any = self.parser.stack.pop() | |
o.append(v) | |
o = list(reversed(o)) | |
self.parser.stack.append(o) | |
return True | |
# | |
# Parser | |
# | |
class Parser: | |
def __init__(self): | |
pass | |
def parse(self, text: str) -> Any: | |
raise NotImplementedError() | |
class JsonParser(Parser): | |
''' | |
VALUE ::= STRING | NUMBER | OBJECT | ARRAY | |
OBJECT ::= "{" (PAIR ("," PAID)*)? "}" | |
PAIR ::= STRING ":" VALUE | |
ARRAY ::= "[" (VALUE ("," VALUE)*)? "]" | |
''' | |
pos: int | |
line: int | |
col: int | |
input_: Union[str, None] | |
stack: Union[list, None] | |
error: Union[tuple[int, int, str], None] | |
def __init__(self): | |
self.pos = -1 | |
self.line = -1 | |
self.col = -1 | |
self.input_ = None | |
self.stack = None | |
self.error = None | |
# PAIR ::= STRING ":" VALUE | |
pair: Combinator = Sequence( | |
self, | |
String(self), | |
Char(self, ':'), | |
ForwardReference(self, lambda: value) | |
) | |
# ("," PAID)*) | |
pair_tails: Combinator = Repetition( | |
self, | |
Sequence( | |
self, | |
Char(self, ','), | |
pair, | |
) | |
) | |
# (PAIR ("," PAID)*)? | |
pairs: Combinator = Optional_( | |
self, | |
Sequence( | |
self, | |
pair, | |
pair_tails, | |
) | |
) | |
# OBJECT ::= "{" (PAIR ("," PAID)*)? "}" | |
object_: Combinator = Object( | |
self, | |
Sequence( | |
self, | |
Char(self, '{'), | |
pairs, | |
Char(self, '}'), | |
) | |
) | |
# ("," VALUE)* | |
value_tails: Combinator = Repetition( | |
self, | |
Sequence( | |
self, | |
Char(self, ','), | |
ForwardReference(self, lambda: value), | |
) | |
) | |
# (VALUE ("," VALUE)*)? | |
values: Combinator = Optional_( | |
self, | |
Sequence( | |
self, | |
ForwardReference(self, lambda: value), | |
value_tails, | |
) | |
) | |
# "[" (VALUE ("," VALUE)*)? "]" | |
array: Combinator = Array( | |
self, | |
Sequence( | |
self, | |
Char(self, '['), | |
values, | |
Char(self, ']'), | |
) | |
) | |
# VALUE ::= STRING | NUMBER | OBJECT | ARRAY | |
value: Combinator = Choice( | |
self, | |
String(self), | |
Number(self), | |
object_, | |
array, | |
) | |
self.value = value | |
def parse(self, text: str) -> Any: | |
self.pos = 0 | |
self.line = 1 | |
self.col = 1 | |
self.input_ = text | |
self.stack = [] | |
self.error = None | |
self.value.parse() | |
value: Any = self.stack.pop() | |
return value | |
def skip_whitespace(self): | |
while self.pos < len(self.input_) and self.input_[self.pos] in whitespace: | |
if self.input_[self.pos] == '\n': | |
self.line += 1 | |
self.col = 1 | |
else: | |
self.col += 1 | |
self.pos += 1 | |
def parse_stringlet(self): | |
i: int = self.pos | |
if self.input_[i] != '"': | |
self.error = (self.line, self.col, 'expected opening `"`') | |
return False | |
i += 1 | |
begin: int = i | |
while i < len(self.input_) and self.input_[i] != '"': | |
i += 1 | |
if self.input_[i] != '"': | |
self.error = (self.line, self.col, 'expected closing `"`') | |
return False | |
end: int = i | |
i += 1 | |
v: str = self.input_[begin:end] | |
self.stack.append(v) | |
self.pos = i | |
return True | |
def parse_number(self): | |
i: int = self.pos | |
while self.input_[i] in digits: | |
i += 1 | |
if i == self.pos: | |
self.error = (self.line, self.col, 'expected number') | |
return False | |
v: Union[int, float] = int(self.input_[self.pos:i]) | |
self.stack.append(v) | |
self.pos = i | |
return True | |
def parse_char(self, c: str): | |
success: bool = self.input_[self.pos] == c | |
if not success: | |
self.error = (self.line, self.col, 'expected character {c!r}') | |
return False | |
self.pos += 1 | |
return True | |
if __name__ == '__main__': | |
jp = JsonParser() | |
text = ''' | |
{ | |
"x": 1, | |
"y": "2", | |
"items": [ | |
{"type": "user", "id": 0, "refs": []}, | |
{"type": "user", "id": 1, "refs": [{ | |
"type": "ref" | |
}]} | |
] | |
} | |
''' | |
value = jp.parse(text) | |
# {'x': 1, 'y': '2', 'items': [{'type': 'user', 'id': 0, 'refs': []}, {'type': 'user', 'id': 1, 'refs': [{'type': 'ref'}]}]} | |
print(value) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment