Created
December 7, 2022 09:01
-
-
Save smuuf/82e5b315b85b30385c7019d8f1ea0c3e to your computer and use it in GitHub Desktop.
Re or Pyre2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import logging | |
import re as module_re | |
import re2 as module_re2 | |
USAGE_PROBABILITY = 0.5 | |
USE_ONLY_PYRE2 = True | |
_OUR_ATTRS = ('_re', '_re2', '_warn') | |
_OVERRIDE_RE_METHODS = ('search', 'sub', 'finditer') | |
_FINAL_ATTRS = _OUR_ATTRS + _OVERRIDE_RE_METHODS | |
class _ReOrPyrePattern: | |
"""We want to try if some of our regex matching could be done with `pyre2` | |
instead of native `re`. | |
This class is an opt-in wrapper proxy for native `re.Pattern`, which should | |
act as regular `re.Pattern` with a slight difference in that there's a | |
chance that `pyre2` will also be used to do the same regex match. | |
Then, if the `re` vs `pyre2` result is different, a warning will be emitted. | |
Only `re` result is actually returned, so this should not change behavior | |
in any way - and is only for informative purposes. | |
""" | |
_re: module_re.Pattern | |
_re2: module_re2.Pattern | |
def __init__(self, pattern, flags): | |
self._re = module_re.compile(pattern, flags) | |
self._re2 = None | |
try: | |
self._re2 = module_re2.compile(pattern, flags) | |
except Exception: | |
logging.warn(f"[re vs pyre2] pyre2 compile failed for regex: {pattern}") | |
def __getattr__(self, name): | |
# If the desired attr is in fact really ours, return it from us. | |
# Otherwise proxy the call to the internal self._re attr. | |
if name in _FINAL_ATTRS: | |
return object.__getattribute__(self, name) | |
return getattr(self._re, name) | |
def __setattr__(self, name, value): | |
# If the desired attr is in fact really ours, return it from us. | |
# Otherwise proxy the call to the internal self._re attr. | |
if name in _FINAL_ATTRS: | |
object.__setattr__(self, name, value) | |
return | |
setattr(self._re, name, value) | |
def __nonzero__(self): | |
return bool(self._re) | |
def __str__(self): | |
return str(self._re) | |
def _repr__(self): | |
return repr(self._re) | |
@staticmethod | |
def _warn(origin: str, msg: str) -> None: | |
logging.warn(f"[reorpyre][{origin}] {msg}") | |
def search(self, *args, **kwargs): | |
re_result = self._re.search(*args, **kwargs) | |
if not self._re2: | |
return re_result | |
re2_result = None | |
try: | |
re2_result = self._re2.search(*args, **kwargs) | |
except Exception: | |
self.warn('search', f"Pyre2 raised exception for regex: {self._re.pattern}") | |
if re_result and re2_result: | |
if re_result.groups() != re2_result.groups(): | |
self._warn('search', f"Groups not same for regex: {self._re.pattern}") | |
else: | |
if not re_result and not re2_result: | |
pass | |
else: | |
self._warn('search', f"Matched only one: {re_result} vs {re2_result} for regex: {self._re.pattern}") | |
return re_result | |
def sub(self, *args, **kwargs): | |
re_result = self._re.sub(*args, **kwargs) | |
if not self._re2: | |
return re_result | |
re2_result = None | |
try: | |
re2_result = self._re2.sub(*args, **kwargs) | |
except Exception: | |
self._warn('sub', f"Pyre2 raised exception for regex: {self._re.pattern}") | |
if re_result and re2_result and (re_result != re2_result): | |
self._warn('sub', f"Result not same for regex: {self._re.pattern}") | |
return re_result | |
def finditer(self, *args, **kwargs): | |
re_result_iter = self._re.finditer(*args, **kwargs) | |
if not self._re2: | |
yield from re_result_iter | |
re2_result_iter = None | |
try: | |
re2_result_iter = self._re2.finditer(*args, **kwargs) | |
except Exception: | |
self._warn('finditer', f"Pyre2 raised exception for regex: {self._re.pattern}") | |
for re_result, re2_result in zip(re_result_iter, re2_result_iter): | |
if re_result and re2_result: | |
if re_result.groups() != re2_result.groups(): | |
self._warn('search', f"Groups not same for regex: {self._re.pattern}") | |
elif not re2_result: | |
self._warn('finditer', f"Matched only one: {re_result} vs {re2_result} for regex: {self._re.pattern}") | |
yield re_result | |
def compile(pattern, flags=0): | |
# Some probability that pyre2 will be also tested. | |
if random.random() < USAGE_PROBABILITY: | |
if USE_ONLY_PYRE2: | |
return module_re2.compile(pattern, flags) | |
return _ReOrPyrePattern(pattern, flags) | |
return module_re.compile(pattern, flags) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment