Skip to content

Instantly share code, notes, and snippets.

@milesrout
Last active September 7, 2016 11:30
Show Gist options
  • Save milesrout/b73c235f03793dc208fd to your computer and use it in GitHub Desktop.
Save milesrout/b73c235f03793dc208fd to your computer and use it in GitHub Desktop.
import itertools
import re
class Lexer:
def __init__(self, tokens):
self.tokens = tokens
self.regex = re.compile(self.combine_regexes(self.make_named_groups()))
def combine_regexes(self, named_groups):
return '|'.join(named_groups)
def make_named_group(self, key, value):
return "(?P<{0}>{1})".format(key.upper(), value)
def make_named_groups(self):
yield from itertools.starmap(self.make_named_group, self.tokens)
def tokens_from_file(self, filename):
with open(filename) as f:
yield from map(self.tokens_from_string, f.readlines())
def tokens_from_string(self, string):
yield from map(first, self.match_groups(string))
def match_groups(self, string):
yield from (match.groups() for match in self.matches(string))
def matches(self, string):
yield from re.finditer(self.regex, string)
def first(it):
return next(x for x in it if x is not None)
tokens = [
('if', 'if'),
('ident', '[a-zA-Z_]+'),
...
]
l = Lexer(tokens)
with open(filename, 'r') as f:
for line in f.readlines():
print(' '.join(l.tokens_from_string(line)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment