Last active
April 17, 2020 23:33
-
-
Save jeacom25b/ab3d6d330ffb6c4cb5f26f664c039acf to your computer and use it in GitHub Desktop.
A parser combinator prototype.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
class Color: | |
""" | |
colors defined as escape sequences for fancy output. | |
easy to use but dont work on all terminals | |
https://en.wikipedia.org/wiki/ANSI_escape_code | |
""" | |
@classmethod | |
def enable(cls): | |
cls.red = "\u001b[31m" | |
cls.yellow = "\u001b[38;5;221m" | |
cls.pink = "\u001b[38;5;213m" | |
cls.cyan = "\u001b[38;5;38m" | |
cls.green = "\u001b[38;5;112m" | |
cls.reset = "\u001b[0m" | |
@classmethod | |
def disable(cls): | |
cls.red = "" | |
cls.yellow = "" | |
cls.pink = "" | |
cls.cyan = "" | |
cls.green = "" | |
cls.reset = "" | |
Color.enable() | |
def basic_error(index): | |
return f"Coudn't find match at index {index}" | |
def parser_ensure(*args): | |
for arg in args: | |
if isinstance(arg, str): | |
yield String(arg).tag(arg) | |
elif isinstance(arg, BaseParser): | |
yield arg | |
else: | |
raise ValueError(f"{arg} is not a valid parser") | |
class ParserResult: | |
def __init__(self, result=None, new_index=0, error=None, tag=None): | |
self.result = result | |
self.new_index = new_index | |
self.error = error | |
self.tag = tag | |
def tag_set(self, tag): | |
self.tag = tag | |
def result_set(self, r): | |
self.result = r | |
def index_set(self, i, j): | |
self.index = i | |
def error_set(self, e): | |
self.error = e | |
def __getitem__(self, index): | |
return self.result[index] | |
def __setitem__(self, index, val): | |
self.result[index] = val | |
def deep_repr(self, indent_lvl=0): | |
tag = "" if self.tag is None else self.tag | |
indent = " " * indent_lvl | |
if self.error: | |
return f"{indent}{Color.red}E: {self.error}{Color.reset}" | |
elif isinstance(self.result, (list, tuple)): | |
out_lines = [f"{indent}{Color.green}{tag}:{Color.reset}"] | |
for item in self.result: | |
if isinstance(item, ParserResult): | |
out_lines.append(item.deep_repr(indent_lvl + 4)) | |
else: | |
out_lines.append(repr(item)) | |
return "\n".join(out_lines) | |
else: | |
return f"{indent}{Color.green}{tag}: {Color.pink}[{repr(self.result)}{Color.pink}]{Color.reset}" | |
def __repr__(self): | |
return self.deep_repr() | |
class BaseParser: | |
def run(self, string, context=None, index=0): | |
raise NotImplemented() | |
def sep_by(self, by, min_matches=0, max_matches=float("inf")): | |
return Separated(self, by=by, min_matches=min_matches, max_matches=max_matches) | |
def between(self, left, right=None): | |
if not right: | |
right = left | |
return Between(left, self, right) | |
def around(self, mid): | |
return Around(self, mid, self) | |
def to_left_of(self, x): | |
def tr(r): | |
r.tag_set(r[0].tag) | |
r.result_set(r[0].result) | |
return Sequence(self, x).transform(tr) | |
def to_right_of(self, x): | |
def tr(r): | |
r.tag_set(r[1].tag) | |
r.result_set(r[1].result) | |
return Sequence(x, self).transform(tr) | |
def tag(self, tag): | |
return Tag(self, tag) | |
def transform(self, func=lambda x: x): | |
return Transform(self, func) | |
def atleast(self, n): | |
return Many(self, min_matches=n) | |
def atmost(self, n): | |
return Many(self, max_matches=n) | |
def exactly(self, n): | |
return Many(self, max_matches=n, min_matches=n) | |
def __getitem__(self, key): | |
if isinstance(key, tuple): | |
if len(key) == 2: | |
a, b = key | |
if isinstance(a, int) and b == Ellipsis: | |
return self.atleast(a) | |
elif isinstance(b, int) and a == Ellipsis: | |
return self.atmost(b) | |
elif isinstance(a, int) and isinstance(b, int): | |
return Many(self, min_matches=a, max_matches=b) | |
elif isinstance(a, BaseParser): | |
if isinstance(b, int): | |
return self.sep_by(a, min_matches=b, max_matches=b) | |
elif b == Ellipsis: | |
return self.sep_by(a) | |
elif len(key) == 3: | |
a, b, c = key | |
if isinstance(a, BaseParser): | |
if isinstance(b, int) and isinstance(c, int): | |
return self.sep_by(a, min_matches=b, max_matches=c) | |
elif isinstance(b, int) and c == Ellipsis: | |
return self.sep_by(a, min_matches=b) | |
elif isinstance(c, int) and b == Ellipsis: | |
return self.sep_by(a, max_matches=c) | |
elif isinstance(key, int): | |
return self.exactly(key) | |
elif key == Ellipsis: | |
return Many(self) | |
raise ValueError(f"Invalid quantifier {key}") | |
def __floordiv__(self, other): | |
return self.tag(other) | |
def __or__(self, other): | |
return Either(self, other) | |
def __add__(self, other): | |
return Sequence(self, other) | |
def __radd__(self, other): | |
return Sequence(other, self) | |
def __ge__(self, other): | |
return self.tag(other) | |
def __rshift__(self, other): | |
if not isinstance(other, BaseParser): | |
other = String(other) | |
return other.to_right_of(self) | |
def __rrshift__(self, other): | |
return self.to_right_of(String(other)) | |
def __lshift__(self, other): | |
return self.to_left_of(other) | |
def __rlshift__(self, other): | |
return String(other).to_left_of(self) | |
class ParserRecursionPromisses: | |
def __init__(self): | |
self._definitions = {} | |
def __setattr__(self, name, val): | |
if name == "_definitions": | |
super().__setattr__("_definitions", val) | |
else: | |
self._definitions[name] = val | |
def __getattr__(self, name): | |
if name == "_definitions": | |
return object.__getattr__("_definitions")[name] | |
else: | |
return _ThePromissedParser(self._definitions, name) | |
class _ThePromissedParser(BaseParser): | |
def __init__(self, definitions, parser_name): | |
self._definitions = definitions | |
self.parser_name = parser_name | |
def run(self, string, context=None, index=0, ): | |
if self.parser_name in self._definitions: | |
parser = self._definitions[self.parser_name] | |
if not isinstance(parser, BaseParser): | |
raise ValueError(f"{parser} is not a parser") | |
return parser.run(string, context, index) | |
else: | |
raise ValueError( | |
f"Parser {repr(self.parser_name)} promissed but not defined") | |
class Transform(BaseParser): | |
def __init__(self, parser, func=lambda x: None): | |
self.parser, = parser_ensure(parser) | |
self.func = func | |
def run(self, string, context=None, index=0): | |
result = self.parser.run(string, context, index) | |
if not result.error: | |
self.func(result) | |
return result | |
def __repr__(self): | |
return f"Transform({self.parser}, {self.func})" | |
class Tag(BaseParser): | |
def __init__(self, parser, tag): | |
self.parser, = parser_ensure(parser) | |
self.tag = tag | |
def run(self, string, context=None, index=0): | |
result = self.parser.run(string, context, index) | |
if not result.error: | |
result.tag = self.tag | |
return result | |
def __repr__(self): | |
return f"Tag({self.parser}, {repr(self.tag)})" | |
class Chain(BaseParser): | |
def __init__(self, parser, func): | |
self.parser, = parser_ensure(parser) | |
self.func = func | |
def run(self, string, context=None, index=0): | |
result = self.parser.run(string, context, index) | |
if result.error: | |
return result | |
new_parser = self.func(result) | |
if isinstance(new_parser, BaseParser): | |
new_result = new_parser.run( | |
string, context, result.new_index) | |
if new_result.error: | |
return new_result | |
return ParserResult(result=[result, new_result], | |
new_index=new_result.new_index, | |
error=None) | |
return result | |
def __repr__(self): | |
return f"Chain({self.parser}, {self.func})" | |
class Sequence(BaseParser): | |
def __init__(self, *args): | |
self.items = list(parser_ensure(*args)) | |
def run(self, string, context=None, index=0): | |
matches = [] | |
for item in self.items: | |
result = item.run(string, context, index) | |
if not result.error: | |
matches.append(result) | |
index = result.new_index | |
else: | |
return result | |
return ParserResult(result=matches, | |
new_index=index, | |
error=None) | |
def __add__(self, other): | |
return Sequence(*self.items, other) | |
def __repr__(self): | |
return f"Sequence({', '.join(map(repr, self.items))})" | |
class Around(BaseParser): | |
def __init__(self, left, mid, right): | |
self.left, self.mid, self.right = parser_ensure(left, mid, right) | |
def run(self, string, context=None, index=0): | |
lresult = self.left.run(string, context, index) | |
if lresult.error: | |
return lresult | |
mid_result = self.mid.run( | |
string, context, lresult.new_index) | |
if mid_result.error: | |
return mid_result | |
rresult = self.right.run( | |
string, context, mid_result.new_index) | |
if rresult.error: | |
return rresult | |
else: | |
return ParserResult(result=[lresult, rresult], | |
new_index=rresult.new_index, | |
error=None) | |
def __repr__(self): | |
return f"Around({self.left}, {self.mid}, {self.right})" | |
class Between(BaseParser): | |
def __init__(self, left, mid, right): | |
self.left, self.mid, self.right = parser_ensure(left, mid, right) | |
def run(self, string, context=None, index=0): | |
result = self.left.run(string, context, index) | |
if result.error: | |
return result | |
mid_result = self.mid.run( | |
string, context, result.new_index) | |
if mid_result.error: | |
return mid_result | |
result = self.right.run( | |
string, context, mid_result.new_index) | |
if result.error: | |
return result | |
else: | |
mid_result.new_index = result.new_index | |
return mid_result | |
def __repr__(self): | |
return f"Between({self.left}, {self.mid}, {self.right})" | |
class Separated(BaseParser): | |
def __init__(self, parser, by, min_matches=1, max_matches=None): | |
if max_matches is None: | |
max_matches = float("inf") | |
self.parser, self.sep_parser = parser_ensure(parser, by) | |
self.min_matches = min_matches | |
self.max_matches = max_matches | |
def run(self, string, context=None, index=0): | |
matches = [] | |
while True: | |
result = self.parser.run(string, context, index) | |
if not result.error: | |
matches.append(result) | |
else: | |
break | |
result = self.sep_parser.run( | |
string, context, result.new_index) | |
if result.error or len(matches) == self.max_matches: | |
break | |
index = result.new_index | |
if len(matches) < self.min_matches: | |
return result | |
else: | |
return ParserResult(result=matches, | |
new_index=result.new_index, | |
error=None) | |
def __repr__(self): | |
return f"Separated({self.parser}, {self.sep}, {self.min_matches}, {self.max_matches})" | |
class Either(BaseParser): | |
def __init__(self, *args): | |
self.parsers = [] | |
for parser in parser_ensure(*args): | |
if type(parser) == Either: | |
self.parsers.extend(parser.parsers) | |
else: | |
self.parsers.append(parser) | |
def run(self, string, context=None, index=0): | |
errors = [] | |
for parser in self.parsers: | |
result = parser.run(string, context, index) | |
if not result.error: | |
return result | |
else: | |
errors.append(result) | |
else: | |
return ParserResult(result=errors, | |
new_index=index, | |
error=errors) | |
def __repr__(self): | |
return f"Either({', '.join(map(repr, self.parsers))})" | |
class Many(BaseParser): | |
def __init__(self, parser, min_matches=0, max_matches=None): | |
if max_matches is None: | |
max_matches = float("inf") | |
self.min_matches = min_matches | |
self.max_matches = max_matches | |
self.parser, = parser_ensure(parser) | |
def run(self, string, context=None, index=0): | |
matches = [] | |
while True: | |
result = self.parser.run(string, context, index) | |
if not result.error: | |
matches.append(result) | |
index = result.new_index | |
else: | |
if self.min_matches <= len(matches) <= self.max_matches: | |
return ParserResult(result=matches, | |
new_index=index, | |
error=None) | |
else: | |
return result | |
if len(matches) == self.max_matches: | |
return ParserResult(result=matches, | |
new_index=index, | |
error=None) | |
def __repr__(self): | |
return f"Many({self.parser}, {self.min_matches}, {self.max_matches})" | |
class Regex(BaseParser): | |
def __init__(self, x): | |
self.regex = re.compile(x) | |
self.x = x | |
def run(self, string, context=None, index=0): | |
match = self.regex.match(string, index) | |
if match: | |
return ParserResult(result=match[0], | |
new_index=len(match[0]) + index, | |
error=None) | |
else: | |
return ParserResult(result=None, | |
new_index=index, | |
error=basic_error(index)) | |
def __repr__(self): | |
return f"Regex({repr(self.x)})" | |
class String(BaseParser): | |
def __init__(self, x): | |
if x == "": | |
raise ValueError("String(x) arg 1 cannot be empty string") | |
self.x = x | |
def run(self, string, context=None, index=0): | |
string_slice = string[index:index + len(self.x)] | |
if string_slice == self.x: | |
return ParserResult(result=self.x, | |
new_index=index + len(self.x), | |
error=None) | |
else: | |
return ParserResult(result=None, | |
new_index=index, | |
error=basic_error(index)) | |
def __repr__(self): | |
return f"String({repr(self.x)})" | |
if __name__ == "__main__": | |
print("Running parsers.py demo \n\n") | |
id = Regex("[A-Za-z_]+[A-Za-z0-9_]*") | |
whitespace = Regex("[ \t]*") | |
comma = Between(whitespace, ",", whitespace) | |
open_bracket = Between(whitespace, "[", whitespace) | |
close_bracket = Between(whitespace, "]", whitespace) | |
number = Regex(r"\d+").tag("number") | |
pr = ParserRecursionPromisses() | |
pr.array = Between(open_bracket, | |
Separated(Either(number, | |
pr.array), | |
by=comma), | |
close_bracket).tag("array") | |
pr.value = Either(number, pr.array) | |
pr.assignemnt = Transform(Sequence(id.tag("variable"), | |
whitespace, | |
"=", | |
whitespace, | |
pr.value | |
), | |
lambda r: r.result_set( | |
[r.result[0], r.result[4]]) | |
).tag("assignemnt") | |
print(pr.assignemnt.run("asd = [1, [5, 4], 2]")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment