Created
January 31, 2022 01:24
-
-
Save phantamanta44/8b5530744476d5be7c5edc8b66380113 to your computer and use it in GitHub Desktop.
wynnatlas expression parser generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from abc import ABC | |
from collections import defaultdict | |
import itertools | |
import sys | |
from typing import Any, Iterable, Iterator, Optional, cast | |
class Word(ABC): | |
pass | |
class WordNonterm(Word): | |
def __init__(self, nonterm_name: str): | |
self.nonterm_name = nonterm_name | |
def __eq__(self, o: Any) -> bool: | |
return isinstance(o, WordNonterm) and self.nonterm_name == o.nonterm_name | |
def __hash__(self) -> int: | |
return hash(self.nonterm_name) * 31 | |
def __repr__(self) -> str: | |
return self.nonterm_name | |
class WordTerm(Word): | |
def __init__(self, term_name: str): | |
self.term_name = term_name | |
self.str_rep = term_name if term_name.isidentifier() else f'"{term_name}"' | |
def __eq__(self, o: Any) -> bool: | |
return isinstance(o, WordTerm) and self.term_name == o.term_name | |
def __hash__(self) -> int: | |
return hash(self.term_name) * 13 | |
def __repr__(self) -> str: | |
return self.str_rep | |
class WordEps(WordTerm): | |
def __init__(self): | |
super(WordEps, self).__init__('') | |
def __eq__(self, o: Any) -> bool: | |
return isinstance(o, WordEps) | |
def __hash__(self) -> int: | |
return 0xDEADBEEF | |
def __repr__(self) -> str: | |
return 'ε' | |
EPS = WordEps() | |
def main(): | |
gram_lines: list[str] | |
with open(sys.argv[1], 'r', encoding='UTF-8') as gram_file: | |
gram_lines = gram_file.readlines() | |
nonterm_strs: list[str] = [] | |
nonterm_buf: list[str] = [] | |
def flush_buf(): | |
nonlocal nonterm_buf | |
nonterm_str = '\n'.join(nonterm_buf) | |
if len(nonterm_str) > 0: | |
nonterm_strs.append(nonterm_str) | |
nonterm_buf = [] | |
for line in gram_lines: | |
line = line.strip() | |
if len(line) > 0: | |
nonterm_buf.append(line) | |
else: | |
flush_buf() | |
flush_buf() | |
nonterms: dict[str, list[list[Word]]] = {} | |
terms: set[WordTerm] = {EPS} | |
for nonterm_str in nonterm_strs: | |
nonterm_part_strs = nonterm_str.split(':=', maxsplit=1) | |
if len(nonterm_part_strs) != 2: | |
continue | |
prods: list[list[Word]] = [] | |
for prod_str in nonterm_part_strs[1].split('| '): | |
prod: list[Word] = [] | |
for term_str in prod_str.split(): | |
if term_str == 'ε': | |
prod.append(EPS) | |
elif term_str.startswith('\"'): | |
word = WordTerm(term_str[1:-1]) | |
prod.append(word) | |
terms.add(word) | |
else: | |
prod.append(WordNonterm(term_str)) | |
prods.append(prod) | |
nonterms[nonterm_part_strs[0].strip()] = prods | |
print('### PRODUCTIONS ###') | |
for nonterm_name, prods in nonterms.items(): | |
print(f'{nonterm_name} := {" ".join(repr(word) for word in prods[0])}') | |
indent = ' ' * (len(nonterm_name) + 2) | |
for prod in prods[1:]: | |
print(f'{indent}| {" ".join(repr(word) for word in prod)}') | |
print() | |
first_tbl: defaultdict[Word, set[WordTerm]] = defaultdict(set) | |
for term in terms: | |
first_tbl[term] = {term} | |
while True: | |
changed = False | |
for nonterm_name, prods in nonterms.items(): | |
nonterm_word = WordNonterm(nonterm_name) | |
first_set = first_tbl[nonterm_word] | |
pre_len = len(first_set) | |
for prod in prods: | |
for prod_word in prod: | |
seen_eps = False | |
for fst_word in first_tbl[prod_word]: | |
if fst_word == EPS: | |
seen_eps = True | |
else: | |
first_set.add(fst_word) | |
if not seen_eps: | |
break | |
else: | |
first_set.add(EPS) | |
if len(first_set) != pre_len: | |
changed = True | |
if not changed: | |
break | |
print('### FIRST ###') | |
just_len = max(len(nonterm_name) for nonterm_name in nonterms.keys()) + 2 | |
for nonterm_name in nonterms.keys(): | |
print(f'{f"`{nonterm_name}`".ljust(just_len)} | '\ | |
f'{", ".join(sorted(f"`{repr(word)}`" for word in first_tbl[WordNonterm(nonterm_name)]))}') | |
print() | |
def get_seq_fst(word_seq: Iterable[Word]) -> set[WordTerm]: | |
first_set: set[WordTerm] = set() | |
for word in word_seq: | |
seen_eps = False | |
for fst_word in first_tbl[word]: | |
if fst_word == EPS: | |
seen_eps = True | |
else: | |
first_set.add(fst_word) | |
if not seen_eps: | |
break | |
else: | |
first_set.add(EPS) | |
return first_set | |
follow_tbl: defaultdict[str, set[WordTerm]] = defaultdict(set) | |
while True: | |
changed = False | |
for nonterm_name, prods in nonterms.items(): | |
for prod in prods: | |
for prod_word_ndx in range(len(prod)): | |
prod_word = prod[prod_word_ndx] | |
if isinstance(prod_word, WordNonterm): | |
follow_set = follow_tbl[prod_word.nonterm_name] | |
pre_len = len(follow_set) | |
seen_eps = False | |
following_fst = get_seq_fst(prod[prod_word_ndx + 1:]) | |
for fst_word in following_fst: | |
if fst_word == EPS: | |
seen_eps = True | |
else: | |
follow_set.add(fst_word) | |
if seen_eps or len(following_fst) == 0: | |
for fol_word in follow_tbl[nonterm_name]: | |
follow_set.add(fol_word) | |
if len(follow_set) != pre_len: | |
changed = True | |
if not changed: | |
break | |
print('### FOLLOW ###') | |
for nonterm_name in nonterms.keys(): | |
print(f'{f"`{nonterm_name}`".ljust(just_len)} | '\ | |
f'{", ".join(sorted(f"`{repr(word)}`" for word in follow_tbl[nonterm_name]))}') | |
print() | |
parse_tbl: defaultdict[tuple[str, WordTerm], Optional[int]] = defaultdict(lambda: None) | |
def put_parse_tbl_entry(nonterm_name: str, term: WordTerm, prod_ndx: int): | |
if parse_tbl[(nonterm_name, term)] is not None: | |
raise ValueError(f'Parse table conflict: {nonterm_name} x {term} -> {parse_tbl[(nonterm_name, term)]} AND {prod_ndx}') | |
else: | |
parse_tbl[(nonterm_name, term)] = prod_ndx | |
for nonterm_name, prods in nonterms.items(): | |
for prod_ndx in range(len(prods)): | |
prod_fst = get_seq_fst(prods[prod_ndx]) | |
for term in prod_fst: | |
put_parse_tbl_entry(nonterm_name, term, prod_ndx) | |
if EPS in prod_fst: | |
for term in follow_tbl[nonterm_name]: | |
put_parse_tbl_entry(nonterm_name, term, prod_ndx) | |
term_list = sorted(terms, key=lambda t: repr(t)) | |
print('### Parse Table ###') | |
print(f'Nonterminal | {" | ".join(repr(term) for term in term_list)}') | |
print(f'----------- | {" | ".join("-" * len(repr(term)) for term in term_list)}') | |
for nonterm_name in nonterms.keys(): | |
row: list[str] = [] | |
for term in term_list: | |
prod_ndx = parse_tbl[(nonterm_name, term)] | |
row.append((str(prod_ndx) if prod_ndx is not None else '-').ljust(len(repr(term)))) | |
print(f'{nonterm_name.ljust(11)} | {" | ".join(row)}') | |
print() | |
san_nonterm_name = { nonterm_name: (nonterm_name[0].upper() + nonterm_name[1:]).replace('\'', '0') for nonterm_name in nonterms.keys() } | |
for nonterm_name, prods in nonterms.items(): | |
safe_nonterm_name = nonterm_name.replace('\'', '\\\'') | |
print(f'function take{san_nonterm_name[nonterm_name]}(tokens) {{') | |
print('const children = [];') | |
print('switch (tokens.peek.type) {') | |
term_prods = cast(Iterator[tuple[WordTerm, int]],\ | |
filter(lambda n: n[1] is not None, ((term, parse_tbl[(nonterm_name, term)]) for term in term_list))) | |
for prod_ndx, prod_terms in itertools.groupby(sorted(term_prods, key=lambda n: n[1]), lambda n: n[1]): | |
for term, _ in prod_terms: | |
print(f'case \'{"eof" if term == EPS else term.term_name}\':') | |
for word in prods[cast(int, prod_ndx)]: | |
if isinstance(word, WordNonterm): | |
print(f'children.push(take{san_nonterm_name[word.nonterm_name]}(tokens));') | |
elif isinstance(word, WordTerm) and not isinstance(word, WordEps): | |
print(f'children.push(tokens.consume(\'{word.term_name}\'));') | |
print(f'return {{ type: \'nonterm\', name: \'{safe_nonterm_name}\', prod: {prod_ndx}, children }};') | |
print('default:') | |
print(f'throw new Error(\'Could not parse {safe_nonterm_name}!\');') | |
print('}') | |
print('}') | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
expr := conj | |
exprList := expr exprList' | |
exprList' := "," expr exprList' | |
| ε | |
conj := disj conj' | |
conj' := "&" disj conj' | |
| ε | |
disj := cmpEq disj' | |
disj' := "|" cmpEq disj' | |
| ε | |
cmpEq := cmpRel cmpEq' | |
cmpEq' := "=" cmpRel cmpEq' | |
| "!=" cmpRel cmpEq' | |
| "?=" cmpRel cmpEq' | |
| ε | |
cmpRel := sum cmpRel' | |
cmpRel' := "<=" sum cmpRel' | |
| "<" sum cmpRel' | |
| ">" sum cmpRel' | |
| ">=" sum cmpRel' | |
| ε | |
sum := prod sum' | |
sum' := "+" prod sum' | |
| "-" prod sum' | |
| ε | |
prod := exp prod' | |
prod' := "*" exp prod' | |
| "/" exp prod' | |
| ε | |
exp := unary exp' | |
exp' := "^" unary exp' | |
| ε | |
unary := "-" unary | |
| "!" unary | |
| prim | |
prim := "nLit" | |
| "bLit" | |
| "sLit" | |
| "ident" identTail | |
| "(" expr ")" | |
identTail := "(" args ")" | |
| ε | |
args := exprList | |
| ε |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment