Skip to content

Instantly share code, notes, and snippets.

@mtasic85
Last active September 5, 2021 12:28
Show Gist options
  • Save mtasic85/8f053b6d1e0503248aed1b94bdeb7d8b to your computer and use it in GitHub Desktop.
Save mtasic85/8f053b6d1e0503248aed1b94bdeb7d8b to your computer and use it in GitHub Desktop.
Python - PEG - Parsing Combinators - Simple JSON parser v2.0
'''
PEG - Parsing Combinators
Example: Simple JSON parser
'''
from typing import Any, Callable, Union
from string import digits, whitespace
#
# Combinator
#
class Combinator:
parser: 'Parser'
def __init__(self, parser: 'Parser'):
self.parser = parser
def parse(self) -> bool:
raise NotImplementedError()
class String(Combinator):
def parse(self) -> bool:
self.parser.skip_whitespace()
return self.parser.parse_stringlet()
class Char(Combinator):
c: str
def __init__(self, parser: 'Parser', c: str):
super().__init__(parser)
self.c = c
def parse(self) -> bool:
self.parser.skip_whitespace()
return self.parser.parse_char(self.c)
class Number(Combinator):
def parse(self) -> bool:
self.parser.skip_whitespace()
return self.parser.parse_number()
class Sequence(Combinator):
children: list[Combinator]
def __init__(self, parser: 'Parser', *children):
super().__init__(parser)
self.children = children
def parse(self) -> bool:
pos0 = self.parser.pos
for child in self.children:
if not child.parse():
self.parser.pos = pos0
self.parser.error = (self.parser.line, self.parser.col, 'expected Sequence')
return False
return True
class ForwardReference(Combinator):
supplier: Callable
def __init__(self, parser: 'Parser', supplier: Callable):
super().__init__(parser)
self.supplier = supplier
def parse(self) -> bool:
return self.supplier().parse()
class Repetition(Combinator):
child: Combinator
def __init__(self, parser: 'Parser', child: Combinator):
super().__init__(parser)
self.child = child
def parse(self) -> bool:
while self.child.parse():
pass
return True
class Optional_(Combinator):
child: Combinator
def __init__(self, parser: 'Parser', child: Combinator):
super().__init__(parser)
self.child = child
def parse(self) -> bool:
self.child.parse()
return True
class Choice(Combinator):
children: list[Combinator]
def __init__(self, parser: 'Parser', *children):
super().__init__(parser)
self.children = children
def parse(self):
for child in self.children:
if child.parse():
return True
self.parser.error = (self.parser.line, self.parser.col, 'expected choice')
return False
class Object(Combinator):
child: Combinator
def __init__(self, parser: 'Parser', child: Combinator):
super().__init__(parser)
self.child = child
def parse(self):
stack0: int = len(self.parser.stack)
success: bool = self.child.parse()
if not success:
self.parser.error = (self.parser.line, self.parser.col, 'could not parse object')
return False
o: dict[str, Any] = {}
while len(self.parser.stack) > stack0:
v: Any = self.parser.stack.pop()
k: str = self.parser.stack.pop()
o[k] = v
o = {k: v for k, v in reversed(o.items())}
self.parser.stack.append(o)
return True
class Array(Combinator):
child: Combinator
def __init__(self, parser: 'Parser', child: Combinator):
super().__init__(parser)
self.child = child
def parse(self):
stack0: int = len(self.parser.stack)
success: bool = self.child.parse()
if not success:
self.parser.error = (self.parser.line, self.parser.col, 'could not parse array')
return False
o: list[Any] = []
while len(self.parser.stack) > stack0:
v: Any = self.parser.stack.pop()
o.append(v)
o = list(reversed(o))
self.parser.stack.append(o)
return True
#
# Parser
#
class Parser:
def __init__(self):
pass
def parse(self, text: str) -> Any:
raise NotImplementedError()
class JsonParser(Parser):
'''
VALUE ::= STRING | NUMBER | OBJECT | ARRAY
OBJECT ::= "{" (PAIR ("," PAID)*)? "}"
PAIR ::= STRING ":" VALUE
ARRAY ::= "[" (VALUE ("," VALUE)*)? "]"
'''
pos: int
line: int
col: int
input_: Union[str, None]
stack: Union[list, None]
error: Union[tuple[int, int, str], None]
def __init__(self):
self.pos = -1
self.line = -1
self.col = -1
self.input_ = None
self.stack = None
self.error = None
# PAIR ::= STRING ":" VALUE
pair: Combinator = Sequence(
self,
String(self),
Char(self, ':'),
ForwardReference(self, lambda: value)
)
# ("," PAID)*)
pair_tails: Combinator = Repetition(
self,
Sequence(
self,
Char(self, ','),
pair,
)
)
# (PAIR ("," PAID)*)?
pairs: Combinator = Optional_(
self,
Sequence(
self,
pair,
pair_tails,
)
)
# OBJECT ::= "{" (PAIR ("," PAID)*)? "}"
object_: Combinator = Object(
self,
Sequence(
self,
Char(self, '{'),
pairs,
Char(self, '}'),
)
)
# ("," VALUE)*
value_tails: Combinator = Repetition(
self,
Sequence(
self,
Char(self, ','),
ForwardReference(self, lambda: value),
)
)
# (VALUE ("," VALUE)*)?
values: Combinator = Optional_(
self,
Sequence(
self,
ForwardReference(self, lambda: value),
value_tails,
)
)
# "[" (VALUE ("," VALUE)*)? "]"
array: Combinator = Array(
self,
Sequence(
self,
Char(self, '['),
values,
Char(self, ']'),
)
)
# VALUE ::= STRING | NUMBER | OBJECT | ARRAY
value: Combinator = Choice(
self,
String(self),
Number(self),
object_,
array,
)
self.value = value
def parse(self, text: str) -> Any:
self.pos = 0
self.line = 1
self.col = 1
self.input_ = text
self.stack = []
self.error = None
self.value.parse()
value: Any = self.stack.pop()
return value
def skip_whitespace(self):
while self.pos < len(self.input_) and self.input_[self.pos] in whitespace:
if self.input_[self.pos] == '\n':
self.line += 1
self.col = 1
else:
self.col += 1
self.pos += 1
def parse_stringlet(self):
i: int = self.pos
if self.input_[i] != '"':
self.error = (self.line, self.col, 'expected opening `"`')
return False
i += 1
begin: int = i
while i < len(self.input_) and self.input_[i] != '"':
i += 1
if self.input_[i] != '"':
self.error = (self.line, self.col, 'expected closing `"`')
return False
end: int = i
i += 1
v: str = self.input_[begin:end]
self.stack.append(v)
self.pos = i
return True
def parse_number(self):
i: int = self.pos
while self.input_[i] in digits:
i += 1
if i == self.pos:
self.error = (self.line, self.col, 'expected number')
return False
v: Union[int, float] = int(self.input_[self.pos:i])
self.stack.append(v)
self.pos = i
return True
def parse_char(self, c: str):
success: bool = self.input_[self.pos] == c
if not success:
self.error = (self.line, self.col, 'expected character {c!r}')
return False
self.pos += 1
return True
if __name__ == '__main__':
jp = JsonParser()
text = '''
{
"x": 1,
"y": "2",
"items": [
{"type": "user", "id": 0, "refs": []},
{"type": "user", "id": 1, "refs": [{
"type": "ref"
}]}
]
}
'''
value = jp.parse(text)
# {'x': 1, 'y': '2', 'items': [{'type': 'user', 'id': 0, 'refs': []}, {'type': 'user', 'id': 1, 'refs': [{'type': 'ref'}]}]}
print(value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment