Last active
August 25, 2020 21:58
-
-
Save sug0/a9bb7c068fa08eba5fe55877b53ed0f3 to your computer and use it in GitHub Desktop.
Python parsec
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import reduce | |
| def last(iterator): | |
| item = None | |
| for x in iterator: | |
| item = x | |
| return item | |
| def deplete(iterator): | |
| for x in iterator: | |
| ... | |
| # base class for readers | |
| class Reader(object): | |
| def read(self): | |
| raise NotImplementedError | |
| def undoread(self): | |
| raise NotImplementedError | |
| def reset(self): | |
| raise NotImplementedError | |
| # reader for strings | |
| class StringReader(object): | |
| def __init__(self, s): | |
| self.s = s | |
| self.i = 0 | |
| def reset(self): | |
| self.i = 0 | |
| def undoread(self): | |
| self.i -= 1 | |
| def read(self): | |
| if self.i < len(self.s): | |
| ch = self.s[self.i] | |
| self.i += 1 | |
| return ch | |
| else: | |
| raise EOFError | |
| # parse exception | |
| class ParseError(Exception): | |
| ... | |
| # base class for parsers | |
| class Parser(object): | |
| def run(self, r): | |
| raise NotImplementedError | |
| # parse a single character | |
| class Char(Parser): | |
| def __init__(self, ch): | |
| self.ch = ch | |
| def run(self, reader): | |
| ch = reader.read() | |
| if self.ch == ch: | |
| yield ch | |
| else: | |
| reader.undoread() | |
| raise ParseError(f'Expected "{self.ch}" but got "{ch}"') | |
| # satisfy a predicate | |
| class Satisfy(Parser): | |
| def __init__(self, pred): | |
| self.pred = pred | |
| def run(self, reader): | |
| ch = reader.read() | |
| if self.pred(ch): | |
| yield ch | |
| else: | |
| reader.undoread() | |
| raise ParseError(f'Unable to satisfy predicate') | |
| # parse as many results as we can | |
| class Many(Parser): | |
| def __init__(self, p, one=False): | |
| self.parser = p | |
| self.one = one | |
| def run(self, reader): | |
| if self.one: | |
| yield from self.parser.run(reader) | |
| try: | |
| while True: | |
| yield from self.parser.run(reader) | |
| except (ParseError, EOFError): | |
| return | |
| # alternative parser | |
| class Or(Parser): | |
| def __init__(self, parsers): | |
| self.parsers = parsers | |
| def run(self, reader): | |
| for parser in self.parsers: | |
| try: | |
| yield from parser.run(reader) | |
| return | |
| except (ParseError, EOFError): | |
| continue | |
| raise ParseError('No parser worked :(') | |
| # optional parser | |
| class Opt(Parser): | |
| def __init__(self, parser): | |
| self.parser = parser | |
| def run(self, reader): | |
| try: | |
| yield from self.parser.run(reader) | |
| except (ParseError, EOFError): | |
| ... | |
| # sequence of parsers | |
| class Seq(Parser): | |
| def __init__(self, parsers): | |
| self.parsers = parsers | |
| def run(self, reader): | |
| for parser in self.parsers: | |
| yield from parser.run(reader) | |
| # return the left parser | |
| class Left(Parser): | |
| def __init__(self, left, right): | |
| self.left = left | |
| self.right = right | |
| def run(self, reader): | |
| yield from self.left.run(reader) | |
| deplete(self.right.run(reader)) | |
| # return the right parser | |
| class Right(Parser): | |
| def __init__(self, left, right): | |
| self.left = left | |
| self.right = right | |
| def run(self, reader): | |
| deplete(self.left.run(reader)) | |
| yield from self.right.run(reader) | |
| # between two parsers | |
| class Between(Parser): | |
| def __init__(self, left, middle, right): | |
| self.left = left | |
| self.middle = middle | |
| self.right = right | |
| def run(self, reader): | |
| p = Left(Right(self.left, self.middle), self.right) | |
| yield from p.run(reader) | |
| # apply to a result | |
| class Apply(Parser): | |
| def __init__(self, f, parser): | |
| self.f = f | |
| self.parser = parser | |
| def run(self, reader): | |
| yield self.f(self.parser.run(reader)) | |
| # transform a result | |
| class Map(Parser): | |
| def __init__(self, f, parser): | |
| self.f = f | |
| self.parser = parser | |
| def run(self, reader): | |
| yield from map(self.f, self.parser.run(reader)) | |
| # reduce a result | |
| class Reduce(Parser): | |
| def __init__(self, f, parser): | |
| self.f = f | |
| self.parser = parser | |
| def run(self, reader): | |
| yield reduce(self.f, self.parser.run(reader)) | |
| # a digit | |
| class Digit(Parser): | |
| def run(self, reader): | |
| p = Or(map(lambda x: Char(chr(x+ord('0'))), range(10))) | |
| yield from p.run(reader) | |
| # whitespace | |
| class Space(Parser): | |
| def run(self, reader): | |
| p = Or(map(Char, (' ', '\t'))) | |
| yield from p.run(reader) | |
| class Spaces(Parser): | |
| def run(self, reader): | |
| p = Many(Space()) | |
| yield from p.run(reader) | |
| # parse an arbitrary string | |
| class String(Parser): | |
| def __init__(self, s): | |
| self.s = s | |
| def run(self, reader): | |
| p = Apply(lambda s: ''.join(s), Seq(map(Char, self.s))) | |
| yield from p.run(reader) | |
| # run an arbitrary parser | |
| def parse(parser, string, collect=list): | |
| return collect(parser.run(StringReader(string))) | |
| # driver program | |
| if __name__ == '__main__': | |
| trials = [ | |
| (Apply(lambda s: ''.join(s), Many(Or((Char('a'), Char('b'))))), 'aaaaaaabbbbb'), | |
| (Map(int, Apply(lambda s: ''.join(s), Many(Digit(), one=True))), '213124'), | |
| ] | |
| for parser, reader in trials: | |
| print(parse(parser, reader, collect=next)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment