Skip to content

Instantly share code, notes, and snippets.

@Quacky2200
Forked from mdeous/repermute.py
Last active October 29, 2024 20:00
Show Gist options
  • Save Quacky2200/714acad06f3f80f6bdb92d7d49dea4bf to your computer and use it in GitHub Desktop.
Save Quacky2200/714acad06f3f80f6bdb92d7d49dea4bf to your computer and use it in GitHub Desktop.
Generate all possible permutations of a regex (Python v3)
# -*- coding: utf-8 -*-
#
# Used like this:
#
# from repermute import ipermute
#
# for s in ipermute('[A-Z]\d'):
# print s
#
# Almost all regular expression constructs are supported except for '*'
# (which in the Python sre_parse implementation matches 0 to 65535
# times), '+' and '^'. Non-ASCII characters doesn't work, but I think
# that is a limitation in the sre_parse module. It works by first
# parsing the regexp to string sequences so that [A-Z] becomes ['A',
# 'B', ... 'Z'], \d becomes ['1', ... , '9']. Then a Cartesian join is
# applied on all string sequences to get all possible permutations of
# them all.
import itertools
from sre_constants import *
import sre_parse
import string
category_chars = {
CATEGORY_DIGIT : string.digits,
CATEGORY_SPACE : string.whitespace,
CATEGORY_WORD : string.digits + string.ascii_letters + '_'
}
def unique_extend(res_list, list):
for item in list:
if item not in res_list:
res_list.append(item)
def handle_any(val):
"""
This is different from normal regexp matching. It only matches
printable ASCII characters.
"""
return string.printable
def handle_branch(_tuple):
(tok, val) = _tuple
all_opts = []
for toks in val:
opts = permute_toks(toks)
unique_extend(all_opts, opts)
return all_opts
def handle_category(val):
return list(category_chars[val])
def handle_in(val):
out = []
for tok, val in val:
out += handle_tok(tok, val)
return out
def handle_literal(val):
return [chr(val)]
def handle_max_repeat(repeat):
(min, max, val) = repeat
"""
Handle a repeat token such as {x,y} or ?.
"""
subtok, subval = val[0]
if max > 5000:
# max is the number of cartesian join operations needed to be
# carried out. More than 5000 consumes way to much memory.
raise ValueError("To many repetitions requested (%d)" % max)
optlist = handle_tok(subtok, subval)
results = []
for rep in range(min, max + 1):
results += [''.join(it) for it in join([optlist] * rep)]
return results
def handle_range(val):
lo, hi = val
return (chr(x) for x in range(lo, hi + 1))
def handle_subpattern(val):
return list(permute_toks(val[3]))
def handle_tok(tok, val):
"""
Returns a list of strings of possible permutations for this regexp
token.
"""
handlers = {
ANY : handle_any,
BRANCH : handle_branch,
CATEGORY : handle_category,
LITERAL : handle_literal,
IN : handle_in,
MAX_REPEAT : handle_max_repeat,
RANGE : handle_range,
SUBPATTERN : handle_subpattern}
try:
return handlers[tok](val)
except KeyError as e:
fmt = "Unsupported regular expression construct: %s"
raise ValueError(fmt % tok)
def permute_toks(toks):
"""
Returns a generator of strings of possible permutations for this
regexp token list.
"""
lists = [handle_tok(tok, val) for tok, val in toks]
return (''.join(it) for it in join(lists))
def join(iterlist):
"""
Cartesian join as an iterator of the supplied sequences. Borrowed
from:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302478
"""
def rloop(seqin, comb):
if seqin:
for item in seqin[0]:
newcomb = comb + [item]
for item in rloop(seqin[1:], newcomb):
yield item
else:
yield comb
return rloop(iterlist, [])
########## PUBLIC API ####################
def ipermute(p):
toks = [tok_n_val for tok_n_val in sre_parse.parse(p)]
return permute_toks(toks)
def permute(p):
return list(ipermute(p))
@OneNot
Copy link

OneNot commented Aug 5, 2024

Thanks for getting back to me.

I solved the issue I originally wanted to use this for, using a different method, but it's nice to get it fixed for the next person (or future me) that might want to use it.
The fix and test results look right to me. As for the possible side effects of this fix, without extensive testing, I'm sure you'd know better than me.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment