Last active
July 14, 2018 09:20
-
-
Save edvardm/92f7fa5f433fa382ab547916a8083444 to your computer and use it in GitHub Desktop.
Just a simple tool to remap long regular patterns with shorter ones, useful for comparing two sets of data for equality
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
import string | |
def _get_fh(has_fname): | |
if has_fname: | |
return open(sys.argv[1]) | |
else: | |
return sys.stdin | |
def _get_pattern(has_fname): | |
pattern = sys.argv[2] if has_fname else sys.argv[1] | |
return pattern | |
def run(input_handle, pattern, seq): | |
substitutions = dict() | |
for line in input_handle.readlines(): | |
match = re.search(pattern, line) | |
if match: # TODO: could be several patterns on same line | |
match_string = match[0] | |
if match_string in substitutions: | |
repl = substitutions[match_string] | |
else: | |
substitutions[match_string] = next(seq) | |
repl = substitutions[match_string] | |
line = re.sub(pattern, repl, line) | |
print(line.rstrip('\r\n')) | |
if len(sys.argv) < 2: | |
print("Usage: pattern_to_seq [file] pattern") | |
sys.exit(1) | |
def make_gen(max_numval): | |
max_numgen_val = 2**32 | |
num_gen = iter(range(0, max_numgen_val)) | |
alpha_prefix_gen = iter(string.ascii_uppercase) | |
pre_prefix = next(alpha_prefix_gen) | |
while True: | |
i = next(num_gen) | |
if i and i % max_numval == 0 and pre_prefix != 'Z': | |
pre_prefix = next(alpha_prefix_gen) | |
num_gen = iter(range(0, max_numgen_val)) | |
i = next(num_gen) | |
yield '{}{}'.format(pre_prefix, i) | |
if __name__ == '__main__': | |
fname_present = len(sys.argv) == 3 | |
run(_get_fh(fname_present), _get_pattern(fname_present), make_gen(10000)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment