Skip to content

Instantly share code, notes, and snippets.

@ansaso
Last active June 19, 2020 08:33
Show Gist options
  • Save ansaso/25bbb571c91db5d15e84a96261a0f47c to your computer and use it in GitHub Desktop.
Save ansaso/25bbb571c91db5d15e84a96261a0f47c to your computer and use it in GitHub Desktop.
python regex re.sub with iterable substitution argument
import re
def sub_iter(reg: str, subs: Iterable, string: str) -> str:
'''
Perform an action equivalent to re.sub chronologically for every
match where the string substituted in is an equivalent item in an
iterable.
reg: regular expression
subs: iterable of chronological sub items. (ideally a generator)
string: string to perform subs on. Not modified in place
'''
matches = re.finditer(reg, string) #returns match object iterator
new_s = list(string)
shift = 0
for match, sub in zip(matches, subs):
start_index, stop_index = match.span()
start = start_index + shift
stop = stop_index + shift
new_s[start:stop] = list(str(sub))
old_len = (stop_index - start_index)
new_len = len(str(sub))
len_change = new_len - old_len
shift += len_change
return ''.join(new_s)
# example use - replace content between every parenthesis with
# enumerated number of parenthesis
# () (replace this) not this () ( ) ---> (1) (2) not this (3) (4)
if __name__ == '__main__':
import itertools
string = " test () () works () () "
iterable = itertools.count(start=1, step=1)
regex = r"(?<=\()(.*?)(?=\))"
new_string = sub_iter(regex, iterable, string)
print(new_string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment