-
-
Save zdw/5381482 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SaferScanner is just like re.Scanner, but it neuters any grouping in the lexicon | |
# regular expressions and throws an error on group references, named groups, or | |
# regex in-pattern flags. Any of those can break correct operation of Scanner. | |
import re | |
from sre_constants import BRANCH, SUBPATTERN, GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS | |
class SaferScanner(re.Scanner): | |
def __init__(self, lexicon, flags=0): | |
self.lexicon = lexicon | |
p = [] | |
s = re.sre_parse.Pattern() | |
s.flags = flags | |
for phrase, action in lexicon: | |
p.append(re.sre_parse.SubPattern(s, [ | |
(SUBPATTERN, (len(p)+1, self.subpat(phrase, flags))), | |
])) | |
s.groups = len(p)+1 | |
p = re.sre_parse.SubPattern(s, [(BRANCH, (None, p))]) | |
self.p = p | |
self.scanner = re.sre_compile.compile(p) | |
@classmethod | |
def subpat(cls, phrase, flags): | |
return cls.scrub_sub(re.sre_parse.parse(phrase, flags)) | |
@classmethod | |
def scrub_sub(cls, sub): | |
scrubbedsub = [] | |
seqtypes = (type(()), type([])) | |
for op, arg in sub.data: | |
if type(arg) in seqtypes: | |
arg = [cls.scrub_sub(a) if isinstance(a, re.sre_parse.SubPattern) else a | |
for a in arg] | |
if op in (BRANCH, SUBPATTERN): | |
arg = [None] + arg[1:] | |
if op in (GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS): | |
raise ValueError("Group references not allowed in SaferScanner lexicon") | |
scrubbedsub.append((op, arg)) | |
if sub.pattern.groupdict: | |
raise ValueError("Named captures not allowed in SaferScanner lexicon") | |
if sub.pattern.flags: | |
raise ValueError("RE flag setting not allowed in SaferScanner lexicon") | |
return re.sre_parse.SubPattern(sub.pattern, scrubbedsub) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment