Skip to content

Instantly share code, notes, and snippets.

@victorfsf
Last active March 29, 2016 17:42
Show Gist options
  • Save victorfsf/033e8e0838982a5e428eadd2a259086a to your computer and use it in GitHub Desktop.
Save victorfsf/033e8e0838982a5e428eadd2a259086a to your computer and use it in GitHub Desktop.
Regex Scanner
class Scanner(object):
def __init__(self, rules, flags=0):
pattern = Pattern()
pattern.flags = flags
pattern.groups = len(rules) + 1
self.rules = [name for name, _ in rules]
self._scanner = sre_compile(SubPattern(pattern, [
(BRANCH, (None, [SubPattern(pattern, [
(SUBPATTERN, (group, parse(regex, flags, pattern))),
]) for group, (_, regex) in enumerate(rules, 1)]))
])).scanner
def scan(self, string, skip=False):
sc = self._scanner(string)
match = None
for match in iter(sc.search if skip else sc.match, None):
yield self.rules[match.lastindex - 1], match
if not skip and not match or match.end() < len(string):
raise EOFError(match.end())
scanner = Scanner([
    ('whitespace', r'\s+'),
    ('plus', r'\+'),
    ('minus', r'\-'),
    ('mult', r'\*'),
    ('div', r'/'),
    ('num', r'\d+'),
    ('paren_open', r'\('),
    ('paren_close', r'\)'),
])

for token, match in scanner.scan('(1 + 2) * 3'):
    print (token, match.group())

Original source: http://lucumr.pocoo.org/2015/11/18/pythons-hidden-re-gems/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment