Last active
June 19, 2020 08:33
-
-
Save ansaso/25bbb571c91db5d15e84a96261a0f47c to your computer and use it in GitHub Desktop.
python regex re.sub with iterable substitution argument
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def sub_iter(reg: str, subs: Iterable, string: str) -> str: | |
''' | |
Perform an action equivalent to re.sub chronologically for every | |
match where the string substituted in is an equivalent item in an | |
iterable. | |
reg: regular expression | |
subs: iterable of chronological sub items. (ideally a generator) | |
string: string to perform subs on. Not modified in place | |
''' | |
matches = re.finditer(reg, string) #returns match object iterator | |
new_s = list(string) | |
shift = 0 | |
for match, sub in zip(matches, subs): | |
start_index, stop_index = match.span() | |
start = start_index + shift | |
stop = stop_index + shift | |
new_s[start:stop] = list(str(sub)) | |
old_len = (stop_index - start_index) | |
new_len = len(str(sub)) | |
len_change = new_len - old_len | |
shift += len_change | |
return ''.join(new_s) | |
# example use - replace content between every parenthesis with | |
# enumerated number of parenthesis | |
# () (replace this) not this () ( ) ---> (1) (2) not this (3) (4) | |
if __name__ == '__main__': | |
import itertools | |
string = " test () () works () () " | |
iterable = itertools.count(start=1, step=1) | |
regex = r"(?<=\()(.*?)(?=\))" | |
new_string = sub_iter(regex, iterable, string) | |
print(new_string) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment