Created
February 21, 2023 02:14
-
-
Save arizvisa/915dae0dd9088bbdae1fce89170c49f0 to your computer and use it in GitHub Desktop.
modification of nested characters or tokens
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def indices(string, characters): | |
'''Return a generator that yields each index of the given `characters` found in `string`.''' | |
iterable = (string.find(character) for character in characters) | |
current, index = 0, min([index for index in iterable if 0 <= index] or [-1]) | |
while 0 <= index: | |
yield current + index | |
current, string = current + 1 + index, string[1 + index:] | |
iterable = [string.find(character) for character in characters] | |
index = min([index for index in iterable if 0 <= index] or [-1]) | |
return | |
def parse(string, pairs='<>'): | |
stack, errors, tree, order = [], [], {}, [] | |
for index in indices(string, pairs): | |
if string[index] == pairs[0]: | |
stack.append(index) | |
elif stack: | |
#segment = stack.pop(), index + 1 | |
segment = stack.pop(), index + len(pairs[1]) | |
layer = tree.setdefault(stack[-1] if stack else None, []) | |
order.append(segment), layer.append(segment) | |
else: | |
errors.append(index) | |
continue | |
return order, tree, stack + errors | |
def verify(tree, order, index=None): | |
ok = True | |
for item in tree.get(index, []): | |
start, stop = item | |
if start in tree: | |
ok = ok and verify(tree, order, start) | |
slice = order.pop(0) | |
ok = ok and item == slice | |
return ok | |
def augment(tree): | |
result = {} | |
for index, items in tree.items(): | |
skip, new_items = index or 0, result.setdefault(index, []) | |
for left, right in items: | |
skip, size = left - skip, right - left | |
new_items.append((skip, left, size)) | |
skip = right | |
continue | |
return result | |
def modify(string, augmented, index=None): | |
result, pos = [], 0 | |
for skip, key, size in augmented.get(index, []): | |
skipped, pos = string[pos : pos + skip], pos + skip | |
original = string[pos : pos + size] | |
if key in tree: | |
modified = modify(original, augmented, key) | |
replaced = modified[:] | |
else: | |
replaced = original[:] | |
result.append(skipped) | |
result.append(replaced) | |
pos += size | |
result.append(string[pos:]) | |
return ''.join(result) | |
def process(processor, string, augmented, index=None): | |
result, position = [], 0 | |
for skip, key, size in augmented.get(index, []): | |
skipped, position = string[position : position + skip], position + skip | |
original = string[position : position + size] | |
modified = processor.send(process(processor, original, augmented, key) if key in tree else original) | |
_, position = result.extend([skipped, modified]), position + size | |
result.append(string[position:]) | |
return ''.join(result) | |
def coroutine(result, string, augmented, index=None): | |
position = 0 | |
for skip, key, size in augmented.get(index, []): | |
skipped, position = string[position : position + skip], position + skip | |
original = string[position : position + size] | |
processed = [] | |
if key in tree: | |
coroutine = replace(processed, original, augmented, key) | |
process, changed = True, next(coroutine) | |
else: | |
processed.append(original) | |
process, changed = False, original | |
try: | |
while process: | |
changed = coroutine.send((yield changed)) | |
except StopIteration: | |
pass | |
finally: | |
modified = (yield ''.join(processed)) | |
_, position = result.extend([skipped, modified]), position + size | |
result.append(string[position:]) | |
def token_indices(string, tokens=['<', '>']): | |
current, iterable = 0, ((string.find(token), len(token)) for token in tokens) | |
index, skip = min([(index, length) for index, length in iterable if 0 <= index] or [(-1, 1)]) | |
while 0 <= index: | |
yield current + index, skip | |
current, string = current + skip + index, string[skip + index:] | |
iterable = ((string.find(token), len(token)) for token in tokens) | |
index, skip = min([(index, length) for index, length in iterable if 0 <= index] or [(-1, 1)]) | |
return | |
def parse_nested_tokens(string, pairs=['<', '>']): | |
stack, tree, order, errors = [], {}, [], [] | |
for index, length in token_indices(string, pairs): | |
token = string[index : index + length] | |
if token == pairs[0]: | |
stack.append(index) | |
elif stack: | |
assert(token == pairs[1]) | |
segment = stack.pop(), index + length | |
layer = tree.setdefault(stack[-1] if stack else None, []) | |
order.append(segment), layer.append(segment) | |
else: | |
errors.append((index, index + length)) | |
continue | |
return order, tree, stack + errors |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment