Created
May 3, 2022 10:43
-
-
Save Tishka17/61daf1bf25dc7e13085d96ceb31b9ae1 to your computer and use it in GitHub Desktop.
Parse bracket exspression [x|y|[x]]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from dataclasses import dataclass, field | |
| # Tokens | |
| from typing import List, Iterable | |
| import yaml | |
| @dataclass | |
| class Token: | |
| text: str | |
| pos: int | |
| @dataclass | |
| class OpenBracketToken(Token): | |
| pass | |
| @dataclass | |
| class CloseBracketToken(Token): | |
| pass | |
| @dataclass | |
| class OrToken(Token): | |
| pass | |
| @dataclass | |
| class LiteralToken(Token): | |
| pass | |
| def parse_tokens(text: str): | |
| current = "" | |
| current_start = 0 | |
| for pos, c in enumerate(text): | |
| if c in ("[", "]", "|") and current: | |
| yield LiteralToken(current, pos=current_start) | |
| current = "" | |
| current_start = pos | |
| if c == "[": | |
| yield OpenBracketToken(c, pos=pos) | |
| elif c == "]": | |
| yield CloseBracketToken(c, pos=pos) | |
| elif c == "|": | |
| yield OrToken(c, pos=pos) | |
| else: | |
| if not current: | |
| current_start = pos | |
| current += c | |
| if current: | |
| yield LiteralToken(current, pos=current_start) | |
| # Ast | |
| @dataclass | |
| class Element: | |
| pos: int | |
| @dataclass | |
| class Block(Element): | |
| variants: List[Element] = field(default_factory=list) | |
| @dataclass | |
| class Literal(Element): | |
| text: str | |
| def parse_ast(tokens: Iterable[Token]): | |
| root = None | |
| stack: List[Block] = [] | |
| for token in tokens: | |
| if isinstance(token, OpenBracketToken): | |
| stack.append(Block(pos=token.pos)) | |
| elif isinstance(token, CloseBracketToken): | |
| element = stack.pop() | |
| if stack: | |
| stack[-1].variants.append(element) | |
| else: | |
| if root: | |
| raise ValueError | |
| root = element | |
| elif isinstance(token, LiteralToken): | |
| element = Literal(pos=token.pos, text=token.text) | |
| if stack: | |
| stack[-1].variants.append(element) | |
| else: | |
| if root: | |
| raise ValueError | |
| root = element | |
| elif isinstance(token, OrToken): | |
| pass | |
| else: | |
| raise ValueError | |
| return root | |
| def ast_to_python(root: Element): | |
| if not root: | |
| return None | |
| elif isinstance(root, Block): | |
| return { | |
| "pos": root.pos, | |
| "variants": [ast_to_python(e) for e in root.variants], | |
| } | |
| elif isinstance(root, Literal): | |
| return root.text | |
| else: | |
| raise ValueError | |
| def dump_ast(root: Element): | |
| print(yaml.dump({ | |
| "root": ast_to_python(root), | |
| })) | |
| tokens = list(parse_tokens("[1|[2|3|4]|[xxx|yyy]]")) | |
| ast = parse_ast(tokens) | |
| dump_ast(ast) | |
| print() | |
| dump_ast(parse_ast(parse_tokens("1"))) | |
| print() | |
| dump_ast(parse_ast(parse_tokens("[1|2]"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment