Created
May 9, 2018 17:48
-
-
Save masci/b4b258a3c6dcd4ca8b0265b9da6c8f83 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pattern_filter(items, whitelist=None, blacklist=None, key=None): | |
"""This filters `items` by a regular expression `whitelist` and/or | |
`blacklist`, with the `whitelist` taking precedence. An optional `key` | |
function can be provided that will be passed each item. | |
When you have only one type of list, consider using `pattern_whitelist` | |
or `pattern_blacklist` for increased performance. | |
""" | |
if (whitelist and blacklist): | |
_pattern_filter_chain(items, whitelist, blacklist, key) | |
elif whitelist: | |
_pattern_filter_quick(items, whitelist, False, key) | |
elif blacklist: | |
_pattern_filter_quick(items, blacklist, True, key) | |
else: | |
return items | |
def _pattern_filter_chain(items, whitelist, blacklist, key=None): | |
key = key or __return_self | |
whitelist = whitelist or [] | |
blacklist = blacklist or [] | |
whitelisted = set() | |
blacklisted = set() | |
for item in items: | |
item_key = key(item) | |
whitelisted.update(item_key for pattern in whitelist if re.search(pattern, item_key)) | |
blacklisted.update(item_key for pattern in blacklist if re.search(pattern, item_key)) | |
# Remove any whitelisted items from the blacklist. | |
blacklisted.difference_update(whitelisted) | |
return [item for item in items if key(item) not in blacklisted] | |
def _pattern_filter_quick(items, expr, invert_match=False, key=None): | |
"""This filters `items` by a regular expression `expr`. An optional | |
`key` function can be provided that will be passed each item. | |
""" | |
if not expr: | |
return items | |
key = key or __return_self | |
matches = { | |
key(item) for pattern in whitelist | |
for item in items | |
if re.search(pattern, key(item)) | |
} | |
if invert_match: | |
return [item for item in items if key(item) not in matches] | |
return [item for item in items if key(item) in matches] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment