Skip to content

Instantly share code, notes, and snippets.

@yxy
Created November 25, 2015 08:57
Show Gist options
  • Save yxy/21ac2efc07ffbe7c2a5a to your computer and use it in GitHub Desktop.
Save yxy/21ac2efc07ffbe7c2a5a to your computer and use it in GitHub Desktop.
#-*- encoding: utf8 -*-
from sre_parse import Pattern, SubPattern, parse
from sre_compile import compile as sre_compile
from sre_constants import BRANCH, SUBPATTERN
class Scanner(object):
def __init__(self, rules, flags=0):
pattern = Pattern()
pattern.flags = flags
pattern.groups = len(rules) + 1
self.rules = [name for name, _ in rules]
self._scanner = sre_compile(SubPattern(pattern, [
(BRANCH, (None, [SubPattern(pattern, [
(SUBPATTERN, (group, parse(regex, flags, pattern))),
]) for group, (_, regex) in enumerate(rules, 1)]))
])).scanner
def scan(self, string, skip=False):
sc = self._scanner(string)
match = None
for match in iter(sc.search if skip else sc.match, None):
yield self.rules[match.lastindex - 1], match
if not skip and not match or match.end() < len(string):
raise EOFError(match.end())
scanner = Scanner([
('whitespace', r'\s+'),
('plus', r'\+'),
('minus', r'\-'),
('mult', r'\*'),
('div', r'/'),
('num', r'\d+'),
('paren_open', r'\('),
('paren_close', r'\)'),
])
for token, match in scanner.scan('(1 + 2) * 3'):
print(token, match.group())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment