Created
October 26, 2019 16:58
-
-
Save cheery/c6076315311c28db5a6d0edfae13e5bb to your computer and use it in GitHub Desktop.
Error "correcting" parser for Ratstail91/Toy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- encoding: utf-8 -*- | |
""" | |
parsergen Rev.3 (with modifications) | |
~~~~~~~~~~~~~~~ | |
Note the parsing tables have changed a bit from the Rev.2 | |
""" | |
import argparse, sys, pprint | |
# This is the syntax that this script understands for now. | |
grammar_syntax = """ | |
grammar: | |
grammar: grammar VALIGN WFB declaration | |
declaration: | |
identifier ":" rhs_block | |
"lexemes" identifiers | |
rhs_block: | |
rhs_block: rhs_block VALIGN WFB rhs | |
rhs: | |
rhs: rhs symbol | |
symbol: "VALIGN" symbol | |
"WFB" symbol | |
identifier | |
string | |
identifiers: identifier | |
identifiers "," identifier | |
lexemes identifier, string | |
""" | |
grammar_state = [ | |
{ None: (1, 'grammar_0', 0, 1), | |
'"lexemes"': (0, 16, 0), | |
'^identifier': (0, 1, 0), | |
'declaration': (0, 23, 0), | |
'grammar': (0, 22, 0), | |
'grammar_0': (0, 21, 0)}, | |
{ '":"': (0, 2, 0)}, | |
{ None: (1, 'declaration', 2, 6), | |
'"VALIGN"': (0, 8, 0), | |
'"WFB"': (0, 5, 0), | |
'"lexemes"': (1, 'declaration', 2, 6), | |
'^identifier': (1, 'declaration', 2, 6), | |
'identifier': (0, 7, 0), | |
'rhs': (0, 4, 0), | |
'rhs_1': (0, 10, 0), | |
'rhs_block': (0, 13, 0), | |
'string': (0, 3, 0), | |
'symbol': (0, 12, 0)}, | |
{ None: (1, 'symbol', 1, 16), | |
'"VALIGN"': (1, 'symbol', 1, 16), | |
'"WFB"': (1, 'symbol', 1, 16), | |
'"lexemes"': (1, 'symbol', 1, 16), | |
'"|"': (1, 'symbol', 1, 16), | |
'^identifier': (1, 'symbol', 1, 16), | |
'identifier': (1, 'symbol', 1, 16), | |
'string': (1, 'symbol', 1, 16)}, | |
{ None: (1, 'rhs_block', 1, 8), | |
'"lexemes"': (1, 'rhs_block', 1, 8), | |
'"|"': (1, 'rhs_block', 1, 8), | |
'^identifier': (1, 'rhs_block', 1, 8)}, | |
{ '"VALIGN"': (0, 8, 0), | |
'"WFB"': (0, 5, 0), | |
'identifier': (0, 7, 0), | |
'string': (0, 3, 0), | |
'symbol': (0, 6, 0)}, | |
{ None: (1, 'symbol', 2, 14), | |
'"VALIGN"': (1, 'symbol', 2, 14), | |
'"WFB"': (1, 'symbol', 2, 14), | |
'"lexemes"': (1, 'symbol', 2, 14), | |
'"|"': (1, 'symbol', 2, 14), | |
'^identifier': (1, 'symbol', 2, 14), | |
'identifier': (1, 'symbol', 2, 14), | |
'string': (1, 'symbol', 2, 14)}, | |
{ None: (1, 'symbol', 1, 15), | |
'"VALIGN"': (1, 'symbol', 1, 15), | |
'"WFB"': (1, 'symbol', 1, 15), | |
'"lexemes"': (1, 'symbol', 1, 15), | |
'"|"': (1, 'symbol', 1, 15), | |
'^identifier': (1, 'symbol', 1, 15), | |
'identifier': (1, 'symbol', 1, 15), | |
'string': (1, 'symbol', 1, 15)}, | |
{ '"VALIGN"': (0, 8, 0), | |
'"WFB"': (0, 5, 0), | |
'identifier': (0, 7, 0), | |
'string': (0, 3, 0), | |
'symbol': (0, 9, 0)}, | |
{ None: (1, 'symbol', 2, 13), | |
'"VALIGN"': (1, 'symbol', 2, 13), | |
'"WFB"': (1, 'symbol', 2, 13), | |
'"lexemes"': (1, 'symbol', 2, 13), | |
'"|"': (1, 'symbol', 2, 13), | |
'^identifier': (1, 'symbol', 2, 13), | |
'identifier': (1, 'symbol', 2, 13), | |
'string': (1, 'symbol', 2, 13)}, | |
{ None: (1, 'rhs', 1, 10), | |
'"VALIGN"': (0, 8, 0), | |
'"WFB"': (0, 5, 0), | |
'"lexemes"': (1, 'rhs', 1, 10), | |
'"|"': (1, 'rhs', 1, 10), | |
'^identifier': (1, 'rhs', 1, 10), | |
'identifier': (0, 7, 0), | |
'string': (0, 3, 0), | |
'symbol': (0, 11, 0)}, | |
{ None: (1, 'rhs_1', 2, 12), | |
'"VALIGN"': (1, 'rhs_1', 2, 12), | |
'"WFB"': (1, 'rhs_1', 2, 12), | |
'"lexemes"': (1, 'rhs_1', 2, 12), | |
'"|"': (1, 'rhs_1', 2, 12), | |
'^identifier': (1, 'rhs_1', 2, 12), | |
'identifier': (1, 'rhs_1', 2, 12), | |
'string': (1, 'rhs_1', 2, 12)}, | |
{ None: (1, 'rhs_1', 1, 11), | |
'"VALIGN"': (1, 'rhs_1', 1, 11), | |
'"WFB"': (1, 'rhs_1', 1, 11), | |
'"lexemes"': (1, 'rhs_1', 1, 11), | |
'"|"': (1, 'rhs_1', 1, 11), | |
'^identifier': (1, 'rhs_1', 1, 11), | |
'identifier': (1, 'rhs_1', 1, 11), | |
'string': (1, 'rhs_1', 1, 11)}, | |
{ None: (1, 'declaration', 3, 5), | |
'"lexemes"': (1, 'declaration', 3, 5), | |
'"|"': (0, 14, 0), | |
'^identifier': (1, 'declaration', 3, 5)}, | |
{ '"VALIGN"': (0, 8, 0), | |
'"WFB"': (0, 5, 0), | |
'identifier': (0, 7, 0), | |
'rhs': (0, 15, 0), | |
'rhs_1': (0, 10, 0), | |
'string': (0, 3, 0), | |
'symbol': (0, 12, 0)}, | |
{ None: (1, 'rhs_block', 3, 9), | |
'"lexemes"': (1, 'rhs_block', 3, 9), | |
'"|"': (1, 'rhs_block', 3, 9), | |
'^identifier': (1, 'rhs_block', 3, 9)}, | |
{ 'identifier': (0, 20, 0), 'identifiers': (0, 17, 0)}, | |
{ None: (1, 'declaration', 2, 7), | |
'","': (0, 18, 0), | |
'"lexemes"': (1, 'declaration', 2, 7), | |
'^identifier': (1, 'declaration', 2, 7)}, | |
{ 'identifier': (0, 19, 0)}, | |
{ None: (1, 'identifiers', 3, 18), | |
'","': (1, 'identifiers', 3, 18), | |
'"lexemes"': (1, 'identifiers', 3, 18), | |
'^identifier': (1, 'identifiers', 3, 18)}, | |
{ None: (1, 'identifiers', 1, 17), | |
'","': (1, 'identifiers', 1, 17), | |
'"lexemes"': (1, 'identifiers', 1, 17), | |
'^identifier': (1, 'identifiers', 1, 17)}, | |
{ None: (1, None, 1, 0)}, | |
{ None: (1, 'grammar_0', 1, 2)}, | |
{ None: (1, 'grammar', 1, 3), | |
'"lexemes"': (0, 16, 0), | |
'^identifier': (0, 1, 0), | |
'declaration': (0, 23, 0), | |
'grammar': (0, 24, 0)}, | |
{ None: (1, 'grammar', 2, 4)}] | |
parser = argparse.ArgumentParser( | |
description='Generate decision tables for LR(1) parser') | |
parser.add_argument('grammar',type=str, | |
help='The input grammar to use, the syntax is described inside this script.') | |
parser.add_argument('--lalr', | |
dest='flavor', default='lr', action='store_const', const='lalr', | |
help='Generate LALR tables instead of LR') | |
parser.add_argument('-p', '--printsets', | |
action='store_const', const=True, default=False, | |
help='Print the itemset table to stdout') | |
parser.add_argument('-m', '--printmore', | |
action='store_const', const=True, default=False, | |
help='Print more details to stdout') | |
parser.add_argument('-o', '--output',type=str, | |
help='Python-formatted output of the parsing tables.') | |
args = parser.parse_args() | |
input_filename = args.grammar | |
python_output = args.output | |
parser_flavor = args.flavor | |
print_itemsets_to_stdout = args.printsets | |
print_more_to_stdout = args.printmore | |
# When the grammar file is obtained, it gets tokenized and parsed. | |
class Tokenizer: | |
def __init__(self): | |
self.state = 'st_0' | |
self.column = 1 | |
self.line = 1 | |
self.pos = (1,1) | |
self.inp = [] | |
def st_0(tok, ch): | |
if ch.isdigit(): | |
tok.pos = (tok.column, tok.line) | |
tok.inp.append(ch) | |
tok.state = 'st_digits' | |
elif ch.isalpha() or ch == "_": | |
tok.pos = (tok.column, tok.line) | |
tok.inp.append(ch) | |
tok.state = 'st_word' | |
elif ch == " " or ch == "\n" or ch == "\t" or ch == "\r" or ch is "": | |
pass | |
elif ch == "#": | |
tok.state = 'st_comment' | |
elif ch == ",": | |
advance('","', ch, (tok.column, tok.line), (tok.column+1, tok.line)) | |
elif ch == ":": | |
advance('":"', ch, (tok.column, tok.line), (tok.column+1, tok.line)) | |
elif ch == "|": | |
advance('"|"', ch, (tok.column, tok.line), (tok.column+1, tok.line)) | |
elif ch == '"': | |
tok.pos = (tok.column, tok.line) | |
tok.state = 'st_string' | |
else: | |
print("error token: {} at {}".format(ch, tok.line)) | |
advance('error', ch, | |
(tok.column, tok.line), (tok.column+1, tok.line)) | |
def st_comment(tok, ch): | |
if ch == "\n": | |
tok.state = 'st_0' | |
def st_word(tok, ch): | |
if ch.isalpha() or ch == "_" or ch.isdigit(): | |
tok.inp.append(ch) | |
else: | |
text = "".join(tok.inp) | |
if text == "lexemes": | |
advance('"' + text + '"', text, tok.pos, (tok.column+1, tok.line)) | |
elif text in ("WFB", "VALIGN"): | |
advance('"' + text + '"', text, tok.pos, (tok.column+1, tok.line)) | |
elif tok.pos[0] == 1: | |
advance('^identifier', text, tok.pos, (tok.column+1, tok.line)) | |
else: | |
advance('identifier', text, tok.pos, (tok.column+1, tok.line)) | |
tok.inp = [] | |
tok.state = 'st_0' | |
st_0(tok, ch) | |
def st_digits(tok, ch): | |
if ch.isdigit(): | |
tok.inp.append(ch) | |
else: | |
advance('digits', "".join(tok.inp), | |
tok.pos, (tok.column+1, tok.line)) | |
tok.inp = [] | |
tok.state = 'st_0' | |
st_0(tok, ch) | |
def st_string(tok, ch): | |
if ch == '"': | |
advance('string', "".join(tok.inp), | |
tok.pos, (tok.column,tok.line)) | |
tok.inp = [] | |
tok.state = 'st_0' | |
else: | |
tok.inp.append(ch) | |
# Collects every 'st_' into a dictionary. | |
tokenize_n = dict((k,v) for k,v in globals().items() | |
if k.startswith('st_')) | |
def tokenize(tok, ch): | |
tokenize_n[tok.state](tok, ch) | |
if ch == "\n": | |
tok.line += 1 | |
tok.column = 1 | |
else: | |
tok.column += 1 | |
# The parsing step | |
class Parser: | |
def __init__(self): | |
self.stack = [] | |
self.forest = [] | |
self.top = 0 | |
#self.layout = 0 | |
p = Parser() | |
def advance(item, text, start, stop): | |
arg = grammar_state[p.top][item] | |
while arg[0] == 1: | |
lhs = arg[1] | |
count = arg[2] | |
items = [] | |
for _ in range(arg[2]): | |
p.top = p.stack.pop() | |
items.append(p.forest.pop()) | |
items.reverse() | |
reduced_item = reduction(arg[3], items) | |
if lhs is None: | |
p.stack = [] | |
p.forest = [] | |
p.top = 0 | |
return | |
else: | |
arg = grammar_state[p.top][lhs] | |
assert arg[0] == 0 | |
p.stack.append(p.top) | |
p.forest.append(reduced_item) | |
p.top = arg[1] | |
arg = grammar_state[p.top][item] | |
#if arg[2] & 1 != 0: # VALIGN | |
# print action, arg1, arg2 | |
# sys.exit(1) | |
p.forest.append(text) | |
p.stack.append(p.top) | |
p.top = arg[1] | |
#if arg[2] & 2 != 0: # WFB | |
# p.layout = start[0] | |
# print((item, text, start, stop)) | |
# print "shift to", p.top | |
class InputGrammar: | |
def __init__(self): | |
self.lexemes = set() | |
self.keywords = set() | |
self.grammar = [] | |
self.wfb = set() | |
self.valign = set() | |
self.ok = False | |
input_grammar = InputGrammar() | |
# The reduction table needs to be changed if grammar changes. | |
def reduction(arg, items): | |
if arg == 0: # ⊤ → grammar_0 | |
input_grammar.ok = True | |
elif arg == 1: # grammar_0 → | |
pass | |
elif arg == 2: # grammar_0 → grammar | |
pass | |
elif arg == 3: # grammar → declaration | |
pass | |
elif arg == 4: # grammar → declaration grammar | |
pass | |
elif arg == 5: # declaration → ^identifier ":" rhs_block | |
if len(input_grammar.grammar) == 0: | |
input_grammar.grammar.append((None, [items[0]])) | |
for annotated_rhs in items[2]: | |
rule = len(input_grammar.grammar) | |
rhs = [] | |
for index, (item, flags) in enumerate(annotated_rhs): | |
rhs.append(item) | |
if 'WFB' in flags: | |
input_grammar.wfb.add((rule, index)) | |
if 'VALIGN' in flags: | |
input_grammar.valign.add((rule, index)) | |
input_grammar.grammar.append((items[0], rhs)) | |
elif arg == 6: # declaration → ^identifier ":" | |
if len(input_grammar.grammar) == 0: | |
input_grammar.grammar.append((None, [items[0]])) | |
input_grammar.grammar.append((items[0], [])) | |
elif arg == 7: # declaration → "lexemes" identifiers | |
input_grammar.lexemes.update(items[1]) | |
elif arg == 8: # rhs_block → rhs | |
return [items[0]] | |
elif arg == 9: # rhs_block → rhs_block "|" rhs | |
return items[0] + [items[2]] | |
elif arg == 10: # rhs → rhs_1 | |
return items[0] | |
elif arg == 11: # rhs_1 → symbol | |
return [items[0]] | |
elif arg == 12: # rhs_1 → rhs_1 symbol | |
return items[0] + [items[1]] | |
elif arg == 13: # symbol → "VALIGN" symbol | |
items[1][1].add('VALIGN') | |
return items[1] # These probably do not even appear because | |
# tokenizer doesn't recognize these keywords yet. | |
elif arg == 14: # symbol → "WFB" symbol | |
items[1][1].add('WFB') | |
return items[1] | |
elif arg == 15: # symbol → identifier | |
return (items[0], set()) | |
elif arg == 16: # symbol → string | |
# Add each of these into keywords and lexeme sets. | |
input_grammar.keywords.add(items[0]) | |
input_grammar.lexemes.add('"' + items[0] + '"') | |
return ('"' + items[0] + '"', set()) | |
elif arg == 17: # identifiers → identifier | |
return [items[0]] | |
elif arg == 18: # identifiers → identifiers "," identifier | |
return items[0] + [items[2]] | |
else: | |
print("unknown reduction rule: {} {}".format(arg, items)) | |
tok = Tokenizer() | |
with open(input_filename, "r") as fd: | |
for ch in fd.read(): | |
tokenize(tok, ch) | |
tokenize(tok, "") | |
advance(None, "", (0,0), (0,0)) | |
grammar = input_grammar.grammar | |
lexemes = input_grammar.lexemes | |
keywords = input_grammar.keywords | |
wfb_constraints = input_grammar.wfb | |
valign_constraints = input_grammar.valign | |
def print_grammar(): | |
for lhs, rhs in grammar: | |
print("{} → {}".format(lhs or '⊤', " ".join(rhs))) | |
def print_item(prefix, (rule, index), fd=sys.stdout): | |
lhs, rhs = grammar[rule] | |
fd.write("{}{} → {}\n".format(prefix, lhs or '⊤', | |
" ".join(rhs[:index] + ['∘'] + rhs[index:]))) | |
def print_itemset(index, items, fd=sys.stdout): | |
prefix = "{}: ".format(index) | |
for item in items: | |
print_item(prefix, item, fd) | |
prefix = " " * len(prefix) | |
# The following routine builds the LR0 itemsets | |
def after_dot((rule, index)): | |
lhs, rhs = grammar[rule] | |
if index < len(rhs): | |
return rhs[index] | |
def predict(items): | |
visited = set(items) | |
prediction = set() | |
while len(items) > 0: | |
sym = after_dot(items.pop()) | |
for index, (lhs,rhs) in enumerate(grammar): | |
if sym == lhs and sym is not None: | |
item = (index, 0) | |
if item not in visited: | |
items.append(item) | |
prediction.add(item) | |
visited.add(item) | |
return prediction | |
def partition(items): | |
groups = {} | |
for item in items: | |
sym = after_dot(item) | |
# The items to be shifted are already shifted here, | |
# so they won't need the treatment later. | |
if sym is not None: | |
item = (item[0], item[1]+1) | |
try: | |
groups[sym].append(item) | |
except KeyError as _: | |
groups[sym] = [item] | |
return [(sym, frozenset(items)) for sym,items in groups.items()] | |
seed_itemsets = [ frozenset([(0,0)]) ] | |
prediction_itemsets = [] | |
itemsets_index = dict((s,i) for i,s in enumerate(seed_itemsets)) | |
shifts = [] | |
reductions = [] | |
vectors = [] | |
for k, seed_itemset in enumerate(seed_itemsets): | |
prediction_itemset = predict(list(seed_itemset)) | |
itemset = seed_itemset | prediction_itemset | |
prediction_itemsets.append(prediction_itemset) | |
k_shifts = [] | |
k_reductions = set() | |
for sym, items in partition(itemset): | |
if sym is None: | |
k_reductions.update(items) | |
else: | |
# If the seed itemset is not already in the table, | |
# it'll be added to the table. | |
try: | |
j = itemsets_index[items] | |
except KeyError as _: | |
j = len(seed_itemsets) | |
itemsets_index[items] = j | |
seed_itemsets.append(items) | |
k_shifts.append((sym, j)) | |
# The reductions and shifts are recorded | |
# this way they don't need to be recorded later. | |
shifts.append(k_shifts) | |
reductions.append(k_reductions) | |
# Vectors are used for memoizing itemsets later. | |
# It is sorted so that lookahead vectors appear in same order | |
# as what the itemsets are printed in. | |
vectors.append(tuple(sorted(seed_itemset))) | |
# Separating and sorting the itemsets provides easy, cleaner printout. | |
if print_itemsets_to_stdout: | |
print_itemset(k, list(vectors[k]) + list(sorted(prediction_itemset))) | |
# This is a good point to warn if the grammar did not parse completely. | |
exit_status = 0 | |
if not input_grammar.ok: | |
exit_status = 1 | |
sys.stderr.write("warning: The parsing encountered errors\n") | |
# Compute the set of empty symbols in the grammar. | |
def empty_symbols(): | |
symbols = set() | |
for lhs,rhs in grammar: | |
if len(rhs) == 0: | |
symbols.add(lhs) | |
m = 0 | |
n = len(symbols) | |
while m < n: | |
for lhs, rhs in grammar: | |
if all(x in symbols for x in rhs): | |
symbols.add(lhs) | |
m = n | |
n = len(symbols) | |
return symbols | |
empty = empty_symbols() | |
# Check the constraints are well-formed. | |
empty_valign_wfb = set() | |
for point in wfb_constraints | valign_constraints: | |
sym = after_dot(point) | |
if sym in empty: | |
empty_valign_wfb.add(sym) | |
if len(empty_valign_wfb) > 0: | |
exit_status = 1 | |
sys.stderr.write( | |
"warning: Constraints in empty symbols\n" | |
" Constraints will end up dangling if their rules are empty\n" | |
" constrained empty symbols:\n" | |
" {}\n".format(",".join(map(repr,sorted(empty_valign_wfb))))) | |
# FIRST & VFIRST -sets construction | |
def first_lexemes(): | |
symbols = dict() | |
routes = set() | |
for sym in lexemes: | |
symbols[sym] = set([sym]) | |
for lhs, rhs in grammar: | |
if lhs not in symbols: | |
symbols[lhs] = set([]) | |
for rule, (lhs, rhs) in enumerate(grammar): | |
for index, rhsN in enumerate(rhs): | |
if (rule,index) not in valign_constraints: | |
routes.add((lhs,rhsN)) | |
if rhsN not in empty: | |
break | |
rep = True | |
while rep: | |
rep = False | |
for lhs, rhs0 in routes: | |
n = len(symbols[lhs]) | |
symbols[lhs].update(symbols[rhs0]) | |
rep |= n < len(symbols[lhs]) | |
return symbols | |
first = first_lexemes() | |
def vfirst_lexemes(): | |
symbols = dict() | |
routes = set() | |
for sym in lexemes: | |
symbols[sym] = set([]) | |
for lhs, rhs in grammar: | |
if lhs not in symbols: | |
symbols[lhs] = set([]) | |
for rule, (lhs, rhs) in enumerate(grammar): | |
for index, rhsN in enumerate(rhs): | |
if (rule,index) in valign_constraints: | |
symbols[lhs].update(first[rhsN]) | |
if rhsN not in empty: | |
break | |
rep = True | |
while rep: | |
rep = False | |
for lhs, rhs0 in routes: | |
n = len(symbols[lhs]) | |
symbols[lhs].update(symbols[rhs0]) | |
rep |= n < len(symbols[lhs]) | |
return symbols | |
vfirst = vfirst_lexemes() | |
# FOLLOW -sets | |
def examine((rule,index)): | |
s = set() | |
w = set() | |
rhs = grammar[rule][1] | |
for i in range(index+1, len(rhs)): | |
w.update(vfirst[rhs[i]]) | |
if (rule,i) in valign_constraints: | |
w.update(first[rhs[i]]) | |
else: | |
s.update(first[rhs[i]]) | |
if rhs[i] not in empty: | |
return s,w,False | |
return s,w,True | |
def follow_lexemes(seedset, predictionset): | |
seeds = {} | |
symbols = {} | |
vsymbols = {} | |
routes = set() | |
for item in seedset | predictionset: | |
sym0 = after_dot(item) | |
if sym0 not in symbols and sym0 is not None: | |
symbols[sym0] = set() | |
vsymbols[sym0] = set() | |
seeds[sym0] = set() | |
for item in seedset: | |
sym0 = after_dot(item) | |
if sym0 is None: | |
continue | |
syms, vsyms, reductive = examine(item) | |
symbols[sym0].update(syms) | |
vsymbols[sym0].update(vsyms) | |
if reductive: | |
seeds[sym0].add(item) | |
for item in predictionset: | |
sym0 = after_dot(item) | |
if sym0 is None: | |
continue | |
syms, vsyms, reductive = examine(item) | |
symbols[sym0].update(syms) | |
vsymbols[sym0].update(vsyms) | |
if reductive: | |
lhs = grammar[item[0]][0] | |
routes.add((lhs, sym0)) | |
rep = True | |
while rep: | |
rep = False | |
for lhs, sym in routes: | |
n = symbols[sym] | |
symbols[sym].update(symbols[lhs]) | |
rep |= n < len(symbols[sym]) | |
n = vsymbols[sym] | |
vsymbols[sym].update(vsymbols[lhs]) | |
rep |= n < len(vsymbols[sym]) | |
n = len(seeds[sym]) | |
seeds[sym].update(seeds[lhs]) | |
rep |= n < len(seeds[sym]) | |
return seeds, symbols, vsymbols | |
def wfb_sets(seedset, predictionset): | |
sym_routes = dict() | |
for item in predictionset: | |
lhs = grammar[item[0]][0] | |
if lhs is None: | |
continue | |
try: | |
sym_routes[lhs].add(item) | |
except KeyError as _: | |
sym_routes[lhs] = set([item]) | |
routes = [] | |
nwfb_items = set() | |
wfb_items = set() | |
for item in predictionset: | |
sym0 = after_dot(item) | |
for sym_item in sym_routes.get(sym0, ()): | |
if item in wfb_constraints: | |
wfb_items.add(sym_item) | |
else: | |
routes.append((item, sym_item)) | |
for item in seedset: | |
sym0 = after_dot(item) | |
if item in wfb_constraints: | |
wfb_items.update(sym_routes.get(sym0, ())) | |
else: | |
nwfb_items.update(sym_routes.get(sym0, ())) | |
n = 0 | |
m = len(nwfb_items) + len(wfb_items) | |
while n < m: | |
n = m | |
for item, sym_item in routes: | |
if item in wfb_items: | |
wfb_items.add(sym_item) | |
if item in nwfb_items: | |
nwfb_items.add(sym_item) | |
m = len(nwfb_items) + len(wfb_items) | |
return wfb_items, nwfb_items | |
def valign_sets(seedset, predictionset): | |
routes = [] | |
valign_syms = set() | |
nvalign_syms = set() | |
for item in predictionset: | |
lhs = grammar[item[0]][0] | |
sym0 = after_dot(item) | |
if sym0 is not None: | |
if item in valign_constraints: | |
valign_syms.add(sym0) | |
else: | |
routes.append((lhs, sym0)) | |
for item in seedset: | |
sym0 = after_dot(item) | |
if sym0 is not None: | |
if item in valign_constraints: | |
valign_syms.add(sym0) | |
else: | |
nvalign_syms.add(sym0) | |
n = 0 | |
m = len(valign_syms) + len(nvalign_syms) | |
while n < m: | |
n = m | |
for item, sym_item in routes: | |
if item in valign_syms: | |
valign_syms.add(sym_item) | |
if item in nvalign_syms: | |
nvalign_syms.add(sym_item) | |
m = len(nvalign_syms) + len(valign_syms) | |
valign_syms &= lexemes | |
nvalign_syms &= lexemes | |
return valign_syms, nvalign_syms | |
follow_seeds = [] | |
follow_syms = [] | |
follow_vsyms = [] | |
wfbs = [] | |
nwfbs = [] | |
valigns = [] | |
nvaligns = [] | |
for i in range(len(seed_itemsets)): | |
seeds,syms,vsyms = follow_lexemes(seed_itemsets[i], prediction_itemsets[i]) | |
wfb,nwfb = wfb_sets(seed_itemsets[i], prediction_itemsets[i]) | |
valign,nvalign = valign_sets(seed_itemsets[i], prediction_itemsets[i]) | |
follow_seeds.append(seeds) | |
follow_syms.append(syms) | |
follow_vsyms.append(vsyms) | |
wfbs.append(wfb) | |
nwfbs.append(nwfb) | |
valigns.append(valign) | |
nvaligns.append(nvalign) | |
def wfb_rules(): | |
bitmap = [] | |
for rule, (lhs,rhs) in enumerate(grammar): | |
if (rule,len(rhs)-1) not in wfb_constraints: | |
is_wfb = False | |
else: | |
is_wfb = all((((rule,index) in valign_constraints) | |
or (len(first[rhs[index]]) == 0)) | |
for index in range(1, len(rhs))) | |
bitmap.append(is_wfb) | |
return bitmap | |
wfbr = wfb_rules() | |
if print_more_to_stdout: # Print FIRST/VFIRST -sets | |
print('FIRST') | |
for sym in first: | |
print(" {}: {}".format(sym, ",".join(map(repr, first[sym])))) | |
print('VFIRST') | |
for sym in vfirst: | |
print(" {}: {}".format(sym, ",".join(map(repr, vfirst[sym])))) | |
print('FOLLOW (SEED)') | |
for k, items in enumerate(follow_seeds): | |
print(" {}: {}".format(k, items)) | |
print('FOLLOW') | |
for k, items in enumerate(follow_syms): | |
print(" {}: {}".format(k, items)) | |
print('FOLLOW (VALIGN)') | |
for k, items in enumerate(follow_vsyms): | |
print(" {}: {}".format(k, items)) | |
print('WFB ITEMS') | |
for k, items in enumerate(wfbs): | |
print_itemset(k, sorted(items)) | |
print('NWFB ITEMS') | |
for k, items in enumerate(nwfbs): | |
print_itemset(k, sorted(items)) | |
print('VALIGN LEXEMES') | |
for k, items in enumerate(valigns): | |
print(" {}: {}".format(k, items)) | |
print('NVALIGN LEXEMES') | |
for k, items in enumerate(nvaligns): | |
print(" {}: {}".format(k, items)) | |
print('WFBR RULES') | |
for index, (lhs,rhs) in enumerate(grammar): | |
if wfbr[index]: | |
print(" {} → {}".format(lhs or '⊤', " ".join(rhs))) | |
def followup(k, seed_lookahead, item): | |
if item in seed_lookahead: | |
return seed_lookahead[item] | |
else: | |
# The default resolution | |
sym = grammar[item[0]][0] | |
lookahead = set(follow_syms[k][sym]) | |
vlookahead = set(follow_vsyms[k][sym]) | |
for seeditem in follow_seeds[k][sym]: | |
lookahead.update(seed_lookahead[seeditem]) | |
if (item in wfbs[k]) or wfbr[item[0]]: | |
if len(vlookahead) > 0: | |
lookahead.add('wfb') | |
return lookahead | |
else: | |
return lookahead | vlookahead | |
def previous(items): | |
for rule,index in items: | |
yield (rule, index-1) | |
lr_mapping = [(0, frozenset([None]))] | |
lr_index = dict((s,i) for i,s in enumerate(lr_mapping)) | |
lr_tabs = [] | |
lr_conflicts = {} | |
for mapping in lr_mapping: | |
k = mapping[0] | |
tab = {} | |
lr_tabs.append(tab) | |
assert 1 + len(vectors[k]) == len(mapping) | |
seed_lookahead = dict(zip(vectors[k], mapping[1:])) | |
for sym, j in shifts[k]: | |
to_mapping = (j,) + tuple( | |
frozenset(followup(k, seed_lookahead, s_item)) | |
for s_item in previous(vectors[j])) | |
flags = 0 | |
if sym in valigns[k]: | |
if sym in nvaligns[k]: | |
sys.stderr.write("error: VALIGN conflict at {} {}\n".format(k, sym)) | |
exit_status = 1 | |
else: | |
flags |= 4 | |
wfb = False | |
nwfb = False | |
for (rule, index) in previous(vectors[j]): | |
if wfbr[rule] and index + 1 == len(grammar[rule][1]): | |
wfb = True | |
else: | |
if (rule,index) in wfbs[k]: | |
wfb = True | |
if (rule,index) in nwfbs[k]: | |
nwfb = True | |
if wfb: | |
if nwfb: | |
sys.stderr.write("error: WFB conflict at {} {}\n".format(k, sym)) | |
exit_status = 1 | |
else: | |
flags |= 2 | |
if to_mapping in lr_index: | |
tab[sym] = (flags, lr_index[to_mapping]) | |
else: | |
tab[sym] = (flags, len(lr_mapping)) | |
lr_index[to_mapping] = len(lr_mapping) | |
lr_mapping.append(to_mapping) | |
had_conflicts = [] | |
for item in reductions[k]: | |
for sym in followup(k, seed_lookahead, item): | |
action = (1, item[0]) | |
if sym not in tab: | |
tab[sym] = action | |
else: | |
if (mapping,sym) in lr_conflicts: | |
lr_conflicts[(mapping,sym)].append(item) | |
elif tab[sym][0] == 1: | |
lr_conflicts[(mapping,sym)] = [ | |
(tab[sym][1], len(grammar[tab[sym][1]][1])), | |
item ] | |
had_conflicts.append(sym) | |
else: | |
lr_conflicts[(mapping,sym)] = ( | |
list(previous(vectors[lr_mapping[tab[sym][1]][0]])) | |
+ [item]) | |
had_conflicts.append(sym) | |
if len(had_conflicts) > 0: | |
sys.stderr.write('LR conflicts: {}\n'.format(mapping)) | |
for sym in had_conflicts: | |
print_itemset(' {}'.format(sym), lr_conflicts[(mapping,sym)], | |
sys.stderr) | |
if tab[sym][0] != 1: | |
sys.stderr.write(" {}: shift to {}\n".format(sym, | |
lr_mapping[tab[sym][1]][0])) | |
if parser_flavor == 'lr': | |
if python_output is not None: | |
with open(python_output, 'w') as fd: | |
pw = pprint.PrettyPrinter(indent=2, stream=fd) | |
fd.write("state = ") | |
pw.pprint(lr_tabs) | |
fd.write("rstate = ") | |
pw.pprint([(lhs, len(rhs)) for lhs,rhs in grammar]) | |
fd.write("\nkeywords = ") | |
pw.pprint(list(keywords)) | |
fd.write("\nlexemes = ") | |
pw.pprint(list(lexemes)) | |
elif parser_flavor == 'lalr': | |
lalr_tabs = [{} for _ in seed_itemsets] | |
for i, vec in enumerate(lr_mapping): | |
row = lalr_tabs[vec[0]] | |
for key, action in lr_tabs[i].items(): | |
if action[0] == 1: | |
rw_action = action | |
else: | |
to = lr_mapping[action[1]][0] | |
rw_action = (action[0], to) | |
if key not in row: | |
row[key] = rw_action | |
else: | |
assert row[key] == rw_action, ( | |
"LALR table collision" | |
"\nstate: {}" | |
"\nrow[key]: {}" | |
"\nrw_action: {}" | |
).format(vec[0], row[key], rw_action) | |
if python_output is not None: | |
with open(python_output, 'w') as fd: | |
pw = pprint.PrettyPrinter(indent=2, stream=fd) | |
fd.write("state = ") | |
pw.pprint(lalr_tabs) | |
fd.write("rstate = ") | |
pw.pprint([(lhs, len(rhs)) for lhs,rhs in grammar]) | |
fd.write("\nkeywords = ") | |
pw.pprint(list(keywords)) | |
fd.write("\nlexemes = ") | |
pw.pprint(list(lexemes)) | |
sys.exit(exit_status) | |
# Actually the WFB-lexemes probably don't do anything bad. | |
#bad_wfb = set() | |
#for point in wfb_constraints: | |
# sym = after_dot(point) | |
# if sym in lexemes: | |
# bad_wfb.add(sym) | |
# | |
#if len(bad_wfb) > 0: | |
# exit_status = 1 | |
# sys.stderr.write( | |
# "warning: WFB-constrained lexemes\n".format() | |
# " {}\n".format(",".join(map(repr,sorted(bad_wfb))))) | |
# # They may confuse the follow-set buildup | |
# # Because we continue from here, | |
# # it's better to remove the bad constraints. | |
# for point in bad_wfb: | |
# wfb_constraints.discard(point) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lexemes IDENTIFIER, STRING, NUMBER, PLUGIN | |
program: | |
program: program declaration | |
block: "{" declarations "}" | |
declarations: | |
declarations: declarations declaration | |
declaration: varDecl | constDecl | statement | |
varDecl: "var" IDENTIFIER ";" | |
| "var" IDENTIFIER "=" expression ";" | |
constDecl: "const" IDENTIFIER "=" expression ";" | |
statement: "print" expression ";" | |
| "if" "(" expression ")" statement "else" statement ";" | |
| "if" "(" expression ")" statement ";" | |
| "do" statement "while" "(" expression ")" ";" | |
| "while" "(" expression ")" statement ";" | |
# | "for" "(" varExpr expression ")" statement | |
# | "for" "(" varExpr expression ";" expression ")" statement | |
| "break" ";" | |
| "continue" ";" | |
| "return" ";" | |
| "return" expression ";" | |
| "assert" "(" expression ")" ";" | |
| "assert" "(" expression "," expression ")" ";" | |
| "import" STRING ";" | |
| "import" STRING "as" IDENTIFIER ";" | |
| block | |
| expression ";" | |
| ";" | |
#varExpr: varDecl | expression ";" | ";" | |
expression: unary | |
assignment: ternary | |
| IDENTIFIER iop expression | |
#assignment: IDENTIFIER primaries iop expression | |
iop: "=" | "+=" | "-=" | "*=" | "/=" | "%=" | |
primaries: | |
primaries: primaries "[" primary "]" | |
ternary: or | |
| or "?" expression ":" expression | |
or: and | |
| and "||" or | |
and: equality | |
| equality "&&" and | |
equality: comparison | |
| equality "==" comparison | |
| equality "!=" comparison | |
comparison: addition | |
| comparison ">" addition | |
| comparison ">=" addition | |
| comparison "<" addition | |
| comparison "<=" addition | |
addition: multiplication | |
| addition "-" multiplication | |
| addition "+" multiplication | |
multiplication: unary | |
| multiplication "*" unary | |
| multiplication "/" unary | |
| multiplication "%" unary | |
unary: prefix | |
| "!" unary | |
| "-" unary | |
prefix: "++" IDENTIFIER | |
| "--" IDENTIFIER | |
| postfix | |
postfix: IDENTIFIER "++" | |
| IDENTIFIER "--" | |
| call | |
call: primary | |
| call "(" ")" | |
| call "(" arguments ")" | |
| call "[" slice "]" | |
| call "." IDENTIFIER | |
# | call "|>" expression | |
# | call "<|" expression | |
arguments: expression | |
| arguments "," expression | |
slice: primary | |
# | ":" | |
# | ":" primary | |
# | ":" ":" primary | |
# | primary ":" | |
# | primary ":" | |
# | primary ":" ":" primary | |
# | primary ":" primary | |
# | primary ":" primary ":" primary | |
primary: "true" | |
| "false" | |
| "null" | |
| NUMBER | |
| STRING | |
| IDENTIFIER | |
| PLUGIN | |
| function | |
| "(" expression ")" | |
function: "function" "(" parameters ")" block | |
| "function" "(" ")" block | |
# | "(" parameters ")" "=>" expression | |
# | "(" parameters ")" "=>" block | |
# | IDENTIFIER "=>" expression | |
# | IDENTIFIER "=>" block | |
parameters: IDENTIFIER | |
| parameters "," IDENTIFIER |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import "Dictionary"; | |
var dict = Dictionary(); | |
dict.Insert("one", 1); | |
dict.Insert("two", 2); | |
dict.Insert("three", 3); | |
dict.Insert("four", 4); | |
dict.Insert("five", 5); | |
dict.Insert("six", 6); | |
dict.Insert("seven", 7); | |
dict.Insert("eight", 8); | |
dict.Insert("nine", 9); | |
dict.Insert("ten", 10); | |
var dict2 = Dictionary(); | |
dict2.Insert("one", 1); | |
dict2.Insert("two", 2); | |
dict2.Insert("three", 3); | |
dict2.Insert("four", 4); | |
dict2.Insert("five", 5); | |
dict2.Insert("six", 6); | |
dict2.Insert("seven", 7); | |
dict2.Insert("eight", 8); | |
dict2.Insert("nine", 9); | |
dict2.Insert("ten", 10); | |
print Dictionary.IsDictionary(dict); | |
print Dictionary.IsDictionary(0); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
state = [ { None: (1, 1), | |
'"!"': (1, 1), | |
'"("': (1, 1), | |
'"++"': (1, 1), | |
'"-"': (1, 1), | |
'"--"': (1, 1), | |
'";"': (1, 1), | |
'"assert"': (1, 1), | |
'"break"': (1, 1), | |
'"const"': (1, 1), | |
'"continue"': (1, 1), | |
'"do"': (1, 1), | |
'"false"': (1, 1), | |
'"function"': (1, 1), | |
'"if"': (1, 1), | |
'"import"': (1, 1), | |
'"null"': (1, 1), | |
'"print"': (1, 1), | |
'"return"': (1, 1), | |
'"true"': (1, 1), | |
'"var"': (1, 1), | |
'"while"': (1, 1), | |
'"{"': (1, 1), | |
'IDENTIFIER': (1, 1), | |
'NUMBER': (1, 1), | |
'PLUGIN': (1, 1), | |
'STRING': (1, 1), | |
'program': (0, 1)}, | |
{ None: (1, 0), | |
'"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'";"': (0, 37), | |
'"assert"': (0, 16), | |
'"break"': (0, 18), | |
'"const"': (0, 29), | |
'"continue"': (0, 28), | |
'"do"': (0, 32), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"if"': (0, 24), | |
'"import"': (0, 12), | |
'"null"': (0, 13), | |
'"print"': (0, 10), | |
'"return"': (0, 30), | |
'"true"': (0, 6), | |
'"var"': (0, 14), | |
'"while"': (0, 33), | |
'"{"': (0, 17), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'block': (0, 39), | |
'call': (0, 15), | |
'constDecl': (0, 31), | |
'declaration': (0, 4), | |
'expression': (0, 38), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'statement': (0, 23), | |
'unary': (0, 7), | |
'varDecl': (0, 26)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 40)}, | |
{ 'IDENTIFIER': (0, 41)}, | |
{ None: (1, 2), | |
'"!"': (1, 2), | |
'"("': (1, 2), | |
'"++"': (1, 2), | |
'"-"': (1, 2), | |
'"--"': (1, 2), | |
'";"': (1, 2), | |
'"assert"': (1, 2), | |
'"break"': (1, 2), | |
'"const"': (1, 2), | |
'"continue"': (1, 2), | |
'"do"': (1, 2), | |
'"false"': (1, 2), | |
'"function"': (1, 2), | |
'"if"': (1, 2), | |
'"import"': (1, 2), | |
'"null"': (1, 2), | |
'"print"': (1, 2), | |
'"return"': (1, 2), | |
'"true"': (1, 2), | |
'"var"': (1, 2), | |
'"while"': (1, 2), | |
'"{"': (1, 2), | |
'IDENTIFIER': (1, 2), | |
'NUMBER': (1, 2), | |
'PLUGIN': (1, 2), | |
'STRING': (1, 2)}, | |
{ '"("': (1, 80), | |
'")"': (1, 80), | |
'","': (1, 80), | |
'"."': (1, 80), | |
'";"': (1, 80), | |
'"["': (1, 80), | |
'"]"': (1, 80)}, | |
{ '"("': (1, 77), | |
'")"': (1, 77), | |
'","': (1, 77), | |
'"."': (1, 77), | |
'";"': (1, 77), | |
'"["': (1, 77), | |
'"]"': (1, 77)}, | |
{ '")"': (1, 28), '","': (1, 28), '";"': (1, 28)}, | |
{ '"("': (1, 78), | |
'")"': (1, 78), | |
'","': (1, 78), | |
'"."': (1, 78), | |
'";"': (1, 78), | |
'"["': (1, 78), | |
'"]"': (1, 78)}, | |
{ '")"': (1, 60), '","': (1, 60), '";"': (1, 60)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 42), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '"("': (1, 81), | |
'")"': (1, 81), | |
'","': (1, 81), | |
'"."': (1, 81), | |
'";"': (1, 81), | |
'"["': (1, 81), | |
'"]"': (1, 81)}, | |
{ 'STRING': (0, 43)}, | |
{ '"("': (1, 79), | |
'")"': (1, 79), | |
'","': (1, 79), | |
'"."': (1, 79), | |
'";"': (1, 79), | |
'"["': (1, 79), | |
'"]"': (1, 79)}, | |
{ 'IDENTIFIER': (0, 44)}, | |
{ '"("': (0, 46), | |
'")"': (1, 68), | |
'","': (1, 68), | |
'"."': (0, 45), | |
'";"': (1, 68), | |
'"["': (0, 47)}, | |
{ '"("': (0, 48)}, | |
{ '"!"': (1, 4), | |
'"("': (1, 4), | |
'"++"': (1, 4), | |
'"-"': (1, 4), | |
'"--"': (1, 4), | |
'";"': (1, 4), | |
'"assert"': (1, 4), | |
'"break"': (1, 4), | |
'"const"': (1, 4), | |
'"continue"': (1, 4), | |
'"do"': (1, 4), | |
'"false"': (1, 4), | |
'"function"': (1, 4), | |
'"if"': (1, 4), | |
'"import"': (1, 4), | |
'"null"': (1, 4), | |
'"print"': (1, 4), | |
'"return"': (1, 4), | |
'"true"': (1, 4), | |
'"var"': (1, 4), | |
'"while"': (1, 4), | |
'"{"': (1, 4), | |
'"}"': (1, 4), | |
'IDENTIFIER': (1, 4), | |
'NUMBER': (1, 4), | |
'PLUGIN': (1, 4), | |
'STRING': (1, 4), | |
'declarations': (0, 49)}, | |
{ '";"': (0, 50)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 51)}, | |
{ '"("': (1, 82), | |
'")"': (1, 82), | |
'"++"': (0, 53), | |
'","': (1, 82), | |
'"--"': (0, 52), | |
'"."': (1, 82), | |
'";"': (1, 82), | |
'"["': (1, 82)}, | |
{ '"("': (0, 54)}, | |
{ '"("': (1, 84), | |
'")"': (1, 84), | |
'","': (1, 84), | |
'"."': (1, 84), | |
'";"': (1, 84), | |
'"["': (1, 84), | |
'"]"': (1, 84)}, | |
{ None: (1, 8), | |
'"!"': (1, 8), | |
'"("': (1, 8), | |
'"++"': (1, 8), | |
'"-"': (1, 8), | |
'"--"': (1, 8), | |
'";"': (1, 8), | |
'"assert"': (1, 8), | |
'"break"': (1, 8), | |
'"const"': (1, 8), | |
'"continue"': (1, 8), | |
'"do"': (1, 8), | |
'"false"': (1, 8), | |
'"function"': (1, 8), | |
'"if"': (1, 8), | |
'"import"': (1, 8), | |
'"null"': (1, 8), | |
'"print"': (1, 8), | |
'"return"': (1, 8), | |
'"true"': (1, 8), | |
'"var"': (1, 8), | |
'"while"': (1, 8), | |
'"{"': (1, 8), | |
'"}"': (1, 8), | |
'IDENTIFIER': (1, 8), | |
'NUMBER': (1, 8), | |
'PLUGIN': (1, 8), | |
'STRING': (1, 8)}, | |
{ '"("': (0, 55)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 56), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ None: (1, 6), | |
'"!"': (1, 6), | |
'"("': (1, 6), | |
'"++"': (1, 6), | |
'"-"': (1, 6), | |
'"--"': (1, 6), | |
'";"': (1, 6), | |
'"assert"': (1, 6), | |
'"break"': (1, 6), | |
'"const"': (1, 6), | |
'"continue"': (1, 6), | |
'"do"': (1, 6), | |
'"false"': (1, 6), | |
'"function"': (1, 6), | |
'"if"': (1, 6), | |
'"import"': (1, 6), | |
'"null"': (1, 6), | |
'"print"': (1, 6), | |
'"return"': (1, 6), | |
'"true"': (1, 6), | |
'"var"': (1, 6), | |
'"while"': (1, 6), | |
'"{"': (1, 6), | |
'"}"': (1, 6), | |
'IDENTIFIER': (1, 6), | |
'NUMBER': (1, 6), | |
'PLUGIN': (1, 6), | |
'STRING': (1, 6)}, | |
{ '"("': (1, 69), | |
'")"': (1, 69), | |
'","': (1, 69), | |
'"."': (1, 69), | |
'";"': (1, 69), | |
'"["': (1, 69)}, | |
{ '";"': (0, 57)}, | |
{ 'IDENTIFIER': (0, 58)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'";"': (0, 59), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 60), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ None: (1, 7), | |
'"!"': (1, 7), | |
'"("': (1, 7), | |
'"++"': (1, 7), | |
'"-"': (1, 7), | |
'"--"': (1, 7), | |
'";"': (1, 7), | |
'"assert"': (1, 7), | |
'"break"': (1, 7), | |
'"const"': (1, 7), | |
'"continue"': (1, 7), | |
'"do"': (1, 7), | |
'"false"': (1, 7), | |
'"function"': (1, 7), | |
'"if"': (1, 7), | |
'"import"': (1, 7), | |
'"null"': (1, 7), | |
'"print"': (1, 7), | |
'"return"': (1, 7), | |
'"true"': (1, 7), | |
'"var"': (1, 7), | |
'"while"': (1, 7), | |
'"{"': (1, 7), | |
'"}"': (1, 7), | |
'IDENTIFIER': (1, 7), | |
'NUMBER': (1, 7), | |
'PLUGIN': (1, 7), | |
'STRING': (1, 7)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'";"': (0, 37), | |
'"assert"': (0, 16), | |
'"break"': (0, 18), | |
'"continue"': (0, 28), | |
'"do"': (0, 32), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"if"': (0, 24), | |
'"import"': (0, 12), | |
'"null"': (0, 13), | |
'"print"': (0, 10), | |
'"return"': (0, 30), | |
'"true"': (0, 6), | |
'"while"': (0, 33), | |
'"{"': (0, 17), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'block': (0, 39), | |
'call': (0, 15), | |
'expression': (0, 38), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'statement': (0, 61), | |
'unary': (0, 7)}, | |
{ '"("': (0, 62)}, | |
{ '"("': (1, 83), | |
'")"': (1, 83), | |
'","': (1, 83), | |
'"."': (1, 83), | |
'";"': (1, 83), | |
'"["': (1, 83), | |
'"]"': (1, 83)}, | |
{ '")"': (1, 65), '","': (1, 65), '";"': (1, 65)}, | |
{ 'IDENTIFIER': (0, 63)}, | |
{ None: (1, 27), | |
'"!"': (1, 27), | |
'"("': (1, 27), | |
'"++"': (1, 27), | |
'"-"': (1, 27), | |
'"--"': (1, 27), | |
'";"': (1, 27), | |
'"assert"': (1, 27), | |
'"break"': (1, 27), | |
'"const"': (1, 27), | |
'"continue"': (1, 27), | |
'"do"': (1, 27), | |
'"else"': (1, 27), | |
'"false"': (1, 27), | |
'"function"': (1, 27), | |
'"if"': (1, 27), | |
'"import"': (1, 27), | |
'"null"': (1, 27), | |
'"print"': (1, 27), | |
'"return"': (1, 27), | |
'"true"': (1, 27), | |
'"var"': (1, 27), | |
'"while"': (1, 27), | |
'"{"': (1, 27), | |
'"}"': (1, 27), | |
'IDENTIFIER': (1, 27), | |
'NUMBER': (1, 27), | |
'PLUGIN': (1, 27), | |
'STRING': (1, 27)}, | |
{ '";"': (0, 64)}, | |
{ None: (1, 25), | |
'"!"': (1, 25), | |
'"("': (1, 25), | |
'"++"': (1, 25), | |
'"-"': (1, 25), | |
'"--"': (1, 25), | |
'";"': (1, 25), | |
'"assert"': (1, 25), | |
'"break"': (1, 25), | |
'"const"': (1, 25), | |
'"continue"': (1, 25), | |
'"do"': (1, 25), | |
'"else"': (1, 25), | |
'"false"': (1, 25), | |
'"function"': (1, 25), | |
'"if"': (1, 25), | |
'"import"': (1, 25), | |
'"null"': (1, 25), | |
'"print"': (1, 25), | |
'"return"': (1, 25), | |
'"true"': (1, 25), | |
'"var"': (1, 25), | |
'"while"': (1, 25), | |
'"{"': (1, 25), | |
'"}"': (1, 25), | |
'IDENTIFIER': (1, 25), | |
'NUMBER': (1, 25), | |
'PLUGIN': (1, 25), | |
'STRING': (1, 25)}, | |
{ '")"': (1, 62), '","': (1, 62), '";"': (1, 62)}, | |
{ '")"': (1, 64), '","': (1, 64), '";"': (1, 64)}, | |
{ '";"': (0, 65)}, | |
{ '";"': (0, 67), '"as"': (0, 66)}, | |
{ '";"': (0, 69), '"="': (0, 68)}, | |
{ 'IDENTIFIER': (0, 70)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'")"': (0, 71), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'arguments': (0, 72), | |
'call': (0, 15), | |
'expression': (0, 73), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '"("': (0, 25), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 76), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'function': (0, 22), | |
'primary': (0, 74), | |
'slice': (0, 75)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 77), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'";"': (0, 37), | |
'"assert"': (0, 16), | |
'"break"': (0, 18), | |
'"const"': (0, 29), | |
'"continue"': (0, 28), | |
'"do"': (0, 32), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"if"': (0, 24), | |
'"import"': (0, 12), | |
'"null"': (0, 13), | |
'"print"': (0, 10), | |
'"return"': (0, 30), | |
'"true"': (0, 6), | |
'"var"': (0, 14), | |
'"while"': (0, 33), | |
'"{"': (0, 17), | |
'"}"': (0, 79), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'block': (0, 39), | |
'call': (0, 15), | |
'constDecl': (0, 31), | |
'declaration': (0, 78), | |
'expression': (0, 38), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'statement': (0, 23), | |
'unary': (0, 7), | |
'varDecl': (0, 26)}, | |
{ None: (1, 17), | |
'"!"': (1, 17), | |
'"("': (1, 17), | |
'"++"': (1, 17), | |
'"-"': (1, 17), | |
'"--"': (1, 17), | |
'";"': (1, 17), | |
'"assert"': (1, 17), | |
'"break"': (1, 17), | |
'"const"': (1, 17), | |
'"continue"': (1, 17), | |
'"do"': (1, 17), | |
'"else"': (1, 17), | |
'"false"': (1, 17), | |
'"function"': (1, 17), | |
'"if"': (1, 17), | |
'"import"': (1, 17), | |
'"null"': (1, 17), | |
'"print"': (1, 17), | |
'"return"': (1, 17), | |
'"true"': (1, 17), | |
'"var"': (1, 17), | |
'"while"': (1, 17), | |
'"{"': (1, 17), | |
'"}"': (1, 17), | |
'IDENTIFIER': (1, 17), | |
'NUMBER': (1, 17), | |
'PLUGIN': (1, 17), | |
'STRING': (1, 17)}, | |
{ '")"': (1, 61), '","': (1, 61), '";"': (1, 61)}, | |
{ '")"': (1, 67), '","': (1, 67), '";"': (1, 67)}, | |
{ '")"': (1, 66), '","': (1, 66), '";"': (1, 66)}, | |
{ '")"': (0, 81), 'IDENTIFIER': (0, 80), 'parameters': (0, 82)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 83), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '")"': (0, 84)}, | |
{ None: (1, 18), | |
'"!"': (1, 18), | |
'"("': (1, 18), | |
'"++"': (1, 18), | |
'"-"': (1, 18), | |
'"--"': (1, 18), | |
'";"': (1, 18), | |
'"assert"': (1, 18), | |
'"break"': (1, 18), | |
'"const"': (1, 18), | |
'"continue"': (1, 18), | |
'"do"': (1, 18), | |
'"else"': (1, 18), | |
'"false"': (1, 18), | |
'"function"': (1, 18), | |
'"if"': (1, 18), | |
'"import"': (1, 18), | |
'"null"': (1, 18), | |
'"print"': (1, 18), | |
'"return"': (1, 18), | |
'"true"': (1, 18), | |
'"var"': (1, 18), | |
'"while"': (1, 18), | |
'"{"': (1, 18), | |
'"}"': (1, 18), | |
'IDENTIFIER': (1, 18), | |
'NUMBER': (1, 18), | |
'PLUGIN': (1, 18), | |
'STRING': (1, 18)}, | |
{ '"="': (0, 85)}, | |
{ None: (1, 19), | |
'"!"': (1, 19), | |
'"("': (1, 19), | |
'"++"': (1, 19), | |
'"-"': (1, 19), | |
'"--"': (1, 19), | |
'";"': (1, 19), | |
'"assert"': (1, 19), | |
'"break"': (1, 19), | |
'"const"': (1, 19), | |
'"continue"': (1, 19), | |
'"do"': (1, 19), | |
'"else"': (1, 19), | |
'"false"': (1, 19), | |
'"function"': (1, 19), | |
'"if"': (1, 19), | |
'"import"': (1, 19), | |
'"null"': (1, 19), | |
'"print"': (1, 19), | |
'"return"': (1, 19), | |
'"true"': (1, 19), | |
'"var"': (1, 19), | |
'"while"': (1, 19), | |
'"{"': (1, 19), | |
'"}"': (1, 19), | |
'IDENTIFIER': (1, 19), | |
'NUMBER': (1, 19), | |
'PLUGIN': (1, 19), | |
'STRING': (1, 19)}, | |
{ '";"': (0, 86)}, | |
{ '"while"': (0, 87)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 88), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '")"': (1, 63), '","': (1, 63), '";"': (1, 63)}, | |
{ None: (1, 26), | |
'"!"': (1, 26), | |
'"("': (1, 26), | |
'"++"': (1, 26), | |
'"-"': (1, 26), | |
'"--"': (1, 26), | |
'";"': (1, 26), | |
'"assert"': (1, 26), | |
'"break"': (1, 26), | |
'"const"': (1, 26), | |
'"continue"': (1, 26), | |
'"do"': (1, 26), | |
'"else"': (1, 26), | |
'"false"': (1, 26), | |
'"function"': (1, 26), | |
'"if"': (1, 26), | |
'"import"': (1, 26), | |
'"null"': (1, 26), | |
'"print"': (1, 26), | |
'"return"': (1, 26), | |
'"true"': (1, 26), | |
'"var"': (1, 26), | |
'"while"': (1, 26), | |
'"{"': (1, 26), | |
'"}"': (1, 26), | |
'IDENTIFIER': (1, 26), | |
'NUMBER': (1, 26), | |
'PLUGIN': (1, 26), | |
'STRING': (1, 26)}, | |
{ None: (1, 12), | |
'"!"': (1, 12), | |
'"("': (1, 12), | |
'"++"': (1, 12), | |
'"-"': (1, 12), | |
'"--"': (1, 12), | |
'";"': (1, 12), | |
'"assert"': (1, 12), | |
'"break"': (1, 12), | |
'"const"': (1, 12), | |
'"continue"': (1, 12), | |
'"do"': (1, 12), | |
'"else"': (1, 12), | |
'"false"': (1, 12), | |
'"function"': (1, 12), | |
'"if"': (1, 12), | |
'"import"': (1, 12), | |
'"null"': (1, 12), | |
'"print"': (1, 12), | |
'"return"': (1, 12), | |
'"true"': (1, 12), | |
'"var"': (1, 12), | |
'"while"': (1, 12), | |
'"{"': (1, 12), | |
'"}"': (1, 12), | |
'IDENTIFIER': (1, 12), | |
'NUMBER': (1, 12), | |
'PLUGIN': (1, 12), | |
'STRING': (1, 12)}, | |
{ 'IDENTIFIER': (0, 89)}, | |
{ None: (1, 23), | |
'"!"': (1, 23), | |
'"("': (1, 23), | |
'"++"': (1, 23), | |
'"-"': (1, 23), | |
'"--"': (1, 23), | |
'";"': (1, 23), | |
'"assert"': (1, 23), | |
'"break"': (1, 23), | |
'"const"': (1, 23), | |
'"continue"': (1, 23), | |
'"do"': (1, 23), | |
'"else"': (1, 23), | |
'"false"': (1, 23), | |
'"function"': (1, 23), | |
'"if"': (1, 23), | |
'"import"': (1, 23), | |
'"null"': (1, 23), | |
'"print"': (1, 23), | |
'"return"': (1, 23), | |
'"true"': (1, 23), | |
'"var"': (1, 23), | |
'"while"': (1, 23), | |
'"{"': (1, 23), | |
'"}"': (1, 23), | |
'IDENTIFIER': (1, 23), | |
'NUMBER': (1, 23), | |
'PLUGIN': (1, 23), | |
'STRING': (1, 23)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 90), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ None: (1, 9), | |
'"!"': (1, 9), | |
'"("': (1, 9), | |
'"++"': (1, 9), | |
'"-"': (1, 9), | |
'"--"': (1, 9), | |
'";"': (1, 9), | |
'"assert"': (1, 9), | |
'"break"': (1, 9), | |
'"const"': (1, 9), | |
'"continue"': (1, 9), | |
'"do"': (1, 9), | |
'"false"': (1, 9), | |
'"function"': (1, 9), | |
'"if"': (1, 9), | |
'"import"': (1, 9), | |
'"null"': (1, 9), | |
'"print"': (1, 9), | |
'"return"': (1, 9), | |
'"true"': (1, 9), | |
'"var"': (1, 9), | |
'"while"': (1, 9), | |
'"{"': (1, 9), | |
'"}"': (1, 9), | |
'IDENTIFIER': (1, 9), | |
'NUMBER': (1, 9), | |
'PLUGIN': (1, 9), | |
'STRING': (1, 9)}, | |
{ '"("': (1, 73), | |
'")"': (1, 73), | |
'","': (1, 73), | |
'"."': (1, 73), | |
'";"': (1, 73), | |
'"["': (1, 73)}, | |
{ '"("': (1, 70), | |
'")"': (1, 70), | |
'","': (1, 70), | |
'"."': (1, 70), | |
'";"': (1, 70), | |
'"["': (1, 70)}, | |
{ '")"': (0, 91), '","': (0, 92)}, | |
{ '")"': (1, 74), '","': (1, 74)}, | |
{ '"]"': (1, 76)}, | |
{ '"]"': (0, 93)}, | |
{ '"]"': (1, 82)}, | |
{ '")"': (0, 94), '","': (0, 95)}, | |
{ '"!"': (1, 5), | |
'"("': (1, 5), | |
'"++"': (1, 5), | |
'"-"': (1, 5), | |
'"--"': (1, 5), | |
'";"': (1, 5), | |
'"assert"': (1, 5), | |
'"break"': (1, 5), | |
'"const"': (1, 5), | |
'"continue"': (1, 5), | |
'"do"': (1, 5), | |
'"false"': (1, 5), | |
'"function"': (1, 5), | |
'"if"': (1, 5), | |
'"import"': (1, 5), | |
'"null"': (1, 5), | |
'"print"': (1, 5), | |
'"return"': (1, 5), | |
'"true"': (1, 5), | |
'"var"': (1, 5), | |
'"while"': (1, 5), | |
'"{"': (1, 5), | |
'"}"': (1, 5), | |
'IDENTIFIER': (1, 5), | |
'NUMBER': (1, 5), | |
'PLUGIN': (1, 5), | |
'STRING': (1, 5)}, | |
{ None: (1, 3), | |
'"!"': (1, 3), | |
'"("': (1, 3), | |
'")"': (1, 3), | |
'"++"': (1, 3), | |
'","': (1, 3), | |
'"-"': (1, 3), | |
'"--"': (1, 3), | |
'"."': (1, 3), | |
'";"': (1, 3), | |
'"["': (1, 3), | |
'"]"': (1, 3), | |
'"assert"': (1, 3), | |
'"break"': (1, 3), | |
'"const"': (1, 3), | |
'"continue"': (1, 3), | |
'"do"': (1, 3), | |
'"else"': (1, 3), | |
'"false"': (1, 3), | |
'"function"': (1, 3), | |
'"if"': (1, 3), | |
'"import"': (1, 3), | |
'"null"': (1, 3), | |
'"print"': (1, 3), | |
'"return"': (1, 3), | |
'"true"': (1, 3), | |
'"var"': (1, 3), | |
'"while"': (1, 3), | |
'"{"': (1, 3), | |
'"}"': (1, 3), | |
'IDENTIFIER': (1, 3), | |
'NUMBER': (1, 3), | |
'PLUGIN': (1, 3), | |
'STRING': (1, 3)}, | |
{ '")"': (1, 88), '","': (1, 88)}, | |
{ '"{"': (0, 17), 'block': (0, 96)}, | |
{ '")"': (0, 97), '","': (0, 98)}, | |
{ '")"': (0, 99)}, | |
{ '"("': (1, 85), | |
'")"': (1, 85), | |
'","': (1, 85), | |
'"."': (1, 85), | |
'";"': (1, 85), | |
'"["': (1, 85), | |
'"]"': (1, 85)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 100), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ None: (1, 20), | |
'"!"': (1, 20), | |
'"("': (1, 20), | |
'"++"': (1, 20), | |
'"-"': (1, 20), | |
'"--"': (1, 20), | |
'";"': (1, 20), | |
'"assert"': (1, 20), | |
'"break"': (1, 20), | |
'"const"': (1, 20), | |
'"continue"': (1, 20), | |
'"do"': (1, 20), | |
'"else"': (1, 20), | |
'"false"': (1, 20), | |
'"function"': (1, 20), | |
'"if"': (1, 20), | |
'"import"': (1, 20), | |
'"null"': (1, 20), | |
'"print"': (1, 20), | |
'"return"': (1, 20), | |
'"true"': (1, 20), | |
'"var"': (1, 20), | |
'"while"': (1, 20), | |
'"{"': (1, 20), | |
'"}"': (1, 20), | |
'IDENTIFIER': (1, 20), | |
'NUMBER': (1, 20), | |
'PLUGIN': (1, 20), | |
'STRING': (1, 20)}, | |
{ '"("': (0, 101)}, | |
{ '")"': (0, 102)}, | |
{ '";"': (0, 103)}, | |
{ '";"': (0, 104)}, | |
{ '"("': (1, 71), | |
'")"': (1, 71), | |
'","': (1, 71), | |
'"."': (1, 71), | |
'";"': (1, 71), | |
'"["': (1, 71)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 105), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '"("': (1, 72), | |
'")"': (1, 72), | |
'","': (1, 72), | |
'"."': (1, 72), | |
'";"': (1, 72), | |
'"["': (1, 72)}, | |
{ '";"': (0, 106)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 107), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '"("': (1, 87), | |
'")"': (1, 87), | |
'","': (1, 87), | |
'"."': (1, 87), | |
'";"': (1, 87), | |
'"["': (1, 87), | |
'"]"': (1, 87)}, | |
{ '"{"': (0, 17), 'block': (0, 108)}, | |
{ 'IDENTIFIER': (0, 109)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'";"': (0, 37), | |
'"assert"': (0, 16), | |
'"break"': (0, 18), | |
'"continue"': (0, 28), | |
'"do"': (0, 32), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"if"': (0, 24), | |
'"import"': (0, 12), | |
'"null"': (0, 13), | |
'"print"': (0, 10), | |
'"return"': (0, 30), | |
'"true"': (0, 6), | |
'"while"': (0, 33), | |
'"{"': (0, 17), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'block': (0, 39), | |
'call': (0, 15), | |
'expression': (0, 38), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'statement': (0, 110), | |
'unary': (0, 7)}, | |
{ '";"': (0, 111)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"null"': (0, 13), | |
'"true"': (0, 6), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'call': (0, 15), | |
'expression': (0, 112), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'unary': (0, 7)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'";"': (0, 37), | |
'"assert"': (0, 16), | |
'"break"': (0, 18), | |
'"continue"': (0, 28), | |
'"do"': (0, 32), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"if"': (0, 24), | |
'"import"': (0, 12), | |
'"null"': (0, 13), | |
'"print"': (0, 10), | |
'"return"': (0, 30), | |
'"true"': (0, 6), | |
'"while"': (0, 33), | |
'"{"': (0, 17), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'block': (0, 39), | |
'call': (0, 15), | |
'expression': (0, 38), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'statement': (0, 113), | |
'unary': (0, 7)}, | |
{ None: (1, 24), | |
'"!"': (1, 24), | |
'"("': (1, 24), | |
'"++"': (1, 24), | |
'"-"': (1, 24), | |
'"--"': (1, 24), | |
'";"': (1, 24), | |
'"assert"': (1, 24), | |
'"break"': (1, 24), | |
'"const"': (1, 24), | |
'"continue"': (1, 24), | |
'"do"': (1, 24), | |
'"else"': (1, 24), | |
'"false"': (1, 24), | |
'"function"': (1, 24), | |
'"if"': (1, 24), | |
'"import"': (1, 24), | |
'"null"': (1, 24), | |
'"print"': (1, 24), | |
'"return"': (1, 24), | |
'"true"': (1, 24), | |
'"var"': (1, 24), | |
'"while"': (1, 24), | |
'"{"': (1, 24), | |
'"}"': (1, 24), | |
'IDENTIFIER': (1, 24), | |
'NUMBER': (1, 24), | |
'PLUGIN': (1, 24), | |
'STRING': (1, 24)}, | |
{ None: (1, 10), | |
'"!"': (1, 10), | |
'"("': (1, 10), | |
'"++"': (1, 10), | |
'"-"': (1, 10), | |
'"--"': (1, 10), | |
'";"': (1, 10), | |
'"assert"': (1, 10), | |
'"break"': (1, 10), | |
'"const"': (1, 10), | |
'"continue"': (1, 10), | |
'"do"': (1, 10), | |
'"false"': (1, 10), | |
'"function"': (1, 10), | |
'"if"': (1, 10), | |
'"import"': (1, 10), | |
'"null"': (1, 10), | |
'"print"': (1, 10), | |
'"return"': (1, 10), | |
'"true"': (1, 10), | |
'"var"': (1, 10), | |
'"while"': (1, 10), | |
'"{"': (1, 10), | |
'"}"': (1, 10), | |
'IDENTIFIER': (1, 10), | |
'NUMBER': (1, 10), | |
'PLUGIN': (1, 10), | |
'STRING': (1, 10)}, | |
{ '")"': (1, 75), '","': (1, 75)}, | |
{ None: (1, 21), | |
'"!"': (1, 21), | |
'"("': (1, 21), | |
'"++"': (1, 21), | |
'"-"': (1, 21), | |
'"--"': (1, 21), | |
'";"': (1, 21), | |
'"assert"': (1, 21), | |
'"break"': (1, 21), | |
'"const"': (1, 21), | |
'"continue"': (1, 21), | |
'"do"': (1, 21), | |
'"else"': (1, 21), | |
'"false"': (1, 21), | |
'"function"': (1, 21), | |
'"if"': (1, 21), | |
'"import"': (1, 21), | |
'"null"': (1, 21), | |
'"print"': (1, 21), | |
'"return"': (1, 21), | |
'"true"': (1, 21), | |
'"var"': (1, 21), | |
'"while"': (1, 21), | |
'"{"': (1, 21), | |
'"}"': (1, 21), | |
'IDENTIFIER': (1, 21), | |
'NUMBER': (1, 21), | |
'PLUGIN': (1, 21), | |
'STRING': (1, 21)}, | |
{ '")"': (0, 114)}, | |
{ '"("': (1, 86), | |
'")"': (1, 86), | |
'","': (1, 86), | |
'"."': (1, 86), | |
'";"': (1, 86), | |
'"["': (1, 86), | |
'"]"': (1, 86)}, | |
{ '")"': (1, 89), '","': (1, 89)}, | |
{ '";"': (0, 115), '"else"': (0, 116)}, | |
{ None: (1, 11), | |
'"!"': (1, 11), | |
'"("': (1, 11), | |
'"++"': (1, 11), | |
'"-"': (1, 11), | |
'"--"': (1, 11), | |
'";"': (1, 11), | |
'"assert"': (1, 11), | |
'"break"': (1, 11), | |
'"const"': (1, 11), | |
'"continue"': (1, 11), | |
'"do"': (1, 11), | |
'"false"': (1, 11), | |
'"function"': (1, 11), | |
'"if"': (1, 11), | |
'"import"': (1, 11), | |
'"null"': (1, 11), | |
'"print"': (1, 11), | |
'"return"': (1, 11), | |
'"true"': (1, 11), | |
'"var"': (1, 11), | |
'"while"': (1, 11), | |
'"{"': (1, 11), | |
'"}"': (1, 11), | |
'IDENTIFIER': (1, 11), | |
'NUMBER': (1, 11), | |
'PLUGIN': (1, 11), | |
'STRING': (1, 11)}, | |
{ '")"': (0, 117)}, | |
{ '";"': (0, 118)}, | |
{ '";"': (0, 119)}, | |
{ None: (1, 14), | |
'"!"': (1, 14), | |
'"("': (1, 14), | |
'"++"': (1, 14), | |
'"-"': (1, 14), | |
'"--"': (1, 14), | |
'";"': (1, 14), | |
'"assert"': (1, 14), | |
'"break"': (1, 14), | |
'"const"': (1, 14), | |
'"continue"': (1, 14), | |
'"do"': (1, 14), | |
'"else"': (1, 14), | |
'"false"': (1, 14), | |
'"function"': (1, 14), | |
'"if"': (1, 14), | |
'"import"': (1, 14), | |
'"null"': (1, 14), | |
'"print"': (1, 14), | |
'"return"': (1, 14), | |
'"true"': (1, 14), | |
'"var"': (1, 14), | |
'"while"': (1, 14), | |
'"{"': (1, 14), | |
'"}"': (1, 14), | |
'IDENTIFIER': (1, 14), | |
'NUMBER': (1, 14), | |
'PLUGIN': (1, 14), | |
'STRING': (1, 14)}, | |
{ '"!"': (0, 19), | |
'"("': (0, 25), | |
'"++"': (0, 36), | |
'"-"': (0, 2), | |
'"--"': (0, 3), | |
'";"': (0, 37), | |
'"assert"': (0, 16), | |
'"break"': (0, 18), | |
'"continue"': (0, 28), | |
'"do"': (0, 32), | |
'"false"': (0, 8), | |
'"function"': (0, 21), | |
'"if"': (0, 24), | |
'"import"': (0, 12), | |
'"null"': (0, 13), | |
'"print"': (0, 10), | |
'"return"': (0, 30), | |
'"true"': (0, 6), | |
'"while"': (0, 33), | |
'"{"': (0, 17), | |
'IDENTIFIER': (0, 20), | |
'NUMBER': (0, 5), | |
'PLUGIN': (0, 34), | |
'STRING': (0, 11), | |
'block': (0, 39), | |
'call': (0, 15), | |
'expression': (0, 38), | |
'function': (0, 22), | |
'postfix': (0, 35), | |
'prefix': (0, 9), | |
'primary': (0, 27), | |
'statement': (0, 120), | |
'unary': (0, 7)}, | |
{ '";"': (0, 121)}, | |
{ None: (1, 16), | |
'"!"': (1, 16), | |
'"("': (1, 16), | |
'"++"': (1, 16), | |
'"-"': (1, 16), | |
'"--"': (1, 16), | |
'";"': (1, 16), | |
'"assert"': (1, 16), | |
'"break"': (1, 16), | |
'"const"': (1, 16), | |
'"continue"': (1, 16), | |
'"do"': (1, 16), | |
'"else"': (1, 16), | |
'"false"': (1, 16), | |
'"function"': (1, 16), | |
'"if"': (1, 16), | |
'"import"': (1, 16), | |
'"null"': (1, 16), | |
'"print"': (1, 16), | |
'"return"': (1, 16), | |
'"true"': (1, 16), | |
'"var"': (1, 16), | |
'"while"': (1, 16), | |
'"{"': (1, 16), | |
'"}"': (1, 16), | |
'IDENTIFIER': (1, 16), | |
'NUMBER': (1, 16), | |
'PLUGIN': (1, 16), | |
'STRING': (1, 16)}, | |
{ None: (1, 22), | |
'"!"': (1, 22), | |
'"("': (1, 22), | |
'"++"': (1, 22), | |
'"-"': (1, 22), | |
'"--"': (1, 22), | |
'";"': (1, 22), | |
'"assert"': (1, 22), | |
'"break"': (1, 22), | |
'"const"': (1, 22), | |
'"continue"': (1, 22), | |
'"do"': (1, 22), | |
'"else"': (1, 22), | |
'"false"': (1, 22), | |
'"function"': (1, 22), | |
'"if"': (1, 22), | |
'"import"': (1, 22), | |
'"null"': (1, 22), | |
'"print"': (1, 22), | |
'"return"': (1, 22), | |
'"true"': (1, 22), | |
'"var"': (1, 22), | |
'"while"': (1, 22), | |
'"{"': (1, 22), | |
'"}"': (1, 22), | |
'IDENTIFIER': (1, 22), | |
'NUMBER': (1, 22), | |
'PLUGIN': (1, 22), | |
'STRING': (1, 22)}, | |
{ '";"': (0, 122)}, | |
{ None: (1, 15), | |
'"!"': (1, 15), | |
'"("': (1, 15), | |
'"++"': (1, 15), | |
'"-"': (1, 15), | |
'"--"': (1, 15), | |
'";"': (1, 15), | |
'"assert"': (1, 15), | |
'"break"': (1, 15), | |
'"const"': (1, 15), | |
'"continue"': (1, 15), | |
'"do"': (1, 15), | |
'"else"': (1, 15), | |
'"false"': (1, 15), | |
'"function"': (1, 15), | |
'"if"': (1, 15), | |
'"import"': (1, 15), | |
'"null"': (1, 15), | |
'"print"': (1, 15), | |
'"return"': (1, 15), | |
'"true"': (1, 15), | |
'"var"': (1, 15), | |
'"while"': (1, 15), | |
'"{"': (1, 15), | |
'"}"': (1, 15), | |
'IDENTIFIER': (1, 15), | |
'NUMBER': (1, 15), | |
'PLUGIN': (1, 15), | |
'STRING': (1, 15)}, | |
{ None: (1, 13), | |
'"!"': (1, 13), | |
'"("': (1, 13), | |
'"++"': (1, 13), | |
'"-"': (1, 13), | |
'"--"': (1, 13), | |
'";"': (1, 13), | |
'"assert"': (1, 13), | |
'"break"': (1, 13), | |
'"const"': (1, 13), | |
'"continue"': (1, 13), | |
'"do"': (1, 13), | |
'"else"': (1, 13), | |
'"false"': (1, 13), | |
'"function"': (1, 13), | |
'"if"': (1, 13), | |
'"import"': (1, 13), | |
'"null"': (1, 13), | |
'"print"': (1, 13), | |
'"return"': (1, 13), | |
'"true"': (1, 13), | |
'"var"': (1, 13), | |
'"while"': (1, 13), | |
'"{"': (1, 13), | |
'"}"': (1, 13), | |
'IDENTIFIER': (1, 13), | |
'NUMBER': (1, 13), | |
'PLUGIN': (1, 13), | |
'STRING': (1, 13)}] | |
rstate = [ (None, 1), | |
('program', 0), | |
('program', 2), | |
('block', 3), | |
('declarations', 0), | |
('declarations', 2), | |
('declaration', 1), | |
('declaration', 1), | |
('declaration', 1), | |
('varDecl', 3), | |
('varDecl', 5), | |
('constDecl', 5), | |
('statement', 3), | |
('statement', 8), | |
('statement', 6), | |
('statement', 7), | |
('statement', 6), | |
('statement', 2), | |
('statement', 2), | |
('statement', 2), | |
('statement', 3), | |
('statement', 5), | |
('statement', 7), | |
('statement', 3), | |
('statement', 5), | |
('statement', 1), | |
('statement', 2), | |
('statement', 1), | |
('expression', 1), | |
('assignment', 1), | |
('assignment', 3), | |
('iop', 1), | |
('iop', 1), | |
('iop', 1), | |
('iop', 1), | |
('iop', 1), | |
('iop', 1), | |
('primaries', 0), | |
('primaries', 4), | |
('ternary', 1), | |
('ternary', 5), | |
('or', 1), | |
('or', 3), | |
('and', 1), | |
('and', 3), | |
('equality', 1), | |
('equality', 3), | |
('equality', 3), | |
('comparison', 1), | |
('comparison', 3), | |
('comparison', 3), | |
('comparison', 3), | |
('comparison', 3), | |
('addition', 1), | |
('addition', 3), | |
('addition', 3), | |
('multiplication', 1), | |
('multiplication', 3), | |
('multiplication', 3), | |
('multiplication', 3), | |
('unary', 1), | |
('unary', 2), | |
('unary', 2), | |
('prefix', 2), | |
('prefix', 2), | |
('prefix', 1), | |
('postfix', 2), | |
('postfix', 2), | |
('postfix', 1), | |
('call', 1), | |
('call', 3), | |
('call', 4), | |
('call', 4), | |
('call', 3), | |
('arguments', 1), | |
('arguments', 3), | |
('slice', 1), | |
('primary', 1), | |
('primary', 1), | |
('primary', 1), | |
('primary', 1), | |
('primary', 1), | |
('primary', 1), | |
('primary', 1), | |
('primary', 1), | |
('primary', 3), | |
('function', 5), | |
('function', 4), | |
('parameters', 1), | |
('parameters', 3)] | |
keywords = [ 'false', | |
'>=', | |
'*=', | |
'true', | |
'||', | |
'<=', | |
'%=', | |
'as', | |
'null', | |
'!=', | |
'if', | |
'!', | |
'const', | |
')', | |
'(', | |
'+', | |
'*', | |
'-', | |
',', | |
'/', | |
'.', | |
'print', | |
'import', | |
';', | |
':', | |
'=', | |
'<', | |
'?', | |
'>', | |
'function', | |
'do', | |
'return', | |
'==', | |
'else', | |
'assert', | |
'var', | |
'&&', | |
'%', | |
'[', | |
']', | |
'break', | |
'/=', | |
'++', | |
'--', | |
'while', | |
'continue', | |
'-=', | |
'{', | |
'}', | |
'+='] | |
lexemes = [ '"-"', | |
'"++"', | |
'"+"', | |
'"--"', | |
'")"', | |
'NUMBER', | |
'"true"', | |
'"false"', | |
'"print"', | |
'"<"', | |
'"*"', | |
'"import"', | |
'"null"', | |
'"var"', | |
'">="', | |
'"%"', | |
'">"', | |
'"&&"', | |
'"||"', | |
'"}"', | |
'"!"', | |
'"assert"', | |
'"{"', | |
'"else"', | |
'"break"', | |
'"/"', | |
'"%="', | |
'IDENTIFIER', | |
'"function"', | |
'":"', | |
'STRING', | |
'"if"', | |
'"("', | |
'"."', | |
'"]"', | |
'"as"', | |
'"["', | |
'"continue"', | |
'"const"', | |
'"return"', | |
'"do"', | |
'"while"', | |
'"=="', | |
'PLUGIN', | |
'"+="', | |
'"?"', | |
'"*="', | |
'"="', | |
'"-="', | |
'"!="', | |
'";"', | |
'"/="', | |
'"<="', | |
'","'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from toy_language import state, rstate, keywords, lexemes | |
import argparse | |
import pprint | |
import sys | |
class Tokenizer: | |
def __init__(self): | |
self.state = 'st_0' | |
self.column = 1 | |
self.line = 1 | |
self.pos = (1,1) | |
self.inp = [] | |
def st_0(tok, ch): | |
if ch.isdigit(): | |
tok.pos = (tok.column, tok.line) | |
tok.inp.append(ch) | |
tok.state = 'st_digits' | |
elif ch.isalpha() or ch == "_": | |
tok.pos = (tok.column, tok.line) | |
tok.inp.append(ch) | |
tok.state = 'st_word' | |
elif ch == " " or ch == "\n" or ch == "\t" or ch == "\r" or ch is "": | |
pass | |
elif ch == '"': | |
tok.state = 'st_string' | |
elif ch == "#": | |
tok.state = 'st_comment' | |
elif ch in keywords: | |
advance('"' + ch + '"', ch, | |
(tok.column, tok.line), (tok.column+1, tok.line)) | |
else: | |
advance('OP', ch, | |
(tok.column, tok.line), (tok.column+1, tok.line)) | |
def st_comment(tok, ch): | |
if ch == "\n": | |
tok.state = 'st_0' | |
def st_word(tok, ch): | |
if ch.isalpha() or ch == "_" or ch.isdigit(): | |
tok.inp.append(ch) | |
else: | |
text = "".join(tok.inp) | |
if text in keywords: | |
advance('"' + text + '"', text, | |
tok.pos, (tok.column+1, tok.line)) | |
else: | |
advance('IDENTIFIER', text, tok.pos, (tok.column+1, tok.line)) | |
tok.inp = [] | |
tok.state = 'st_0' | |
st_0(tok, ch) | |
def st_digits(tok, ch): | |
if ch.isdigit(): | |
tok.inp.append(ch) | |
else: | |
advance('NUMBER', "".join(tok.inp), | |
tok.pos, (tok.column+1, tok.line)) | |
tok.inp = [] | |
tok.state = 'st_0' | |
st_0(tok, ch) | |
def st_string(tok, ch): | |
if ch == '"': | |
advance('STRING', "".join(tok.inp), | |
tok.pos, (tok.column,tok.line)) | |
tok.inp = [] | |
tok.state = 'st_0' | |
elif ch == '\n': | |
advance('STRING', "".join(tok.inp), | |
tok.pos, (tok.column,tok.line)) | |
tok.inp = [] | |
tok.state = 'st_0' | |
elif ch == '\\': | |
tok.inp.append(ch) | |
tok.state = st_string_esc | |
else: | |
tok.inp.append(ch) | |
def st_string_esc(tok, ch): | |
tok.inp.append(ch) | |
tok.state = st_string | |
# Collects every 'st_' into a dictionary. | |
tokenize_n = dict((k,v) for k,v in globals().items() | |
if k.startswith('st_')) | |
def tokenize(tok, ch): | |
tokenize_n[tok.state](tok, ch) | |
if ch == "\n": | |
tok.line += 1 | |
tok.column = 1 | |
else: | |
tok.column += 1 | |
# The parsing step | |
class Parser: | |
def __init__(self): | |
self.st = (None, 0, 0, 1) # (stack, top, layout, left) | |
self.mode = 0 | |
self.syms = [] | |
self.texts = [] | |
self.starts = [] | |
self.stops = [] | |
p = Parser() | |
def get_action(st, sym, left): | |
dsym = sym if (st[2] < left or sym is None) else 'wfb' | |
table = state[st[1]] | |
if dsym in table: | |
return table[dsym] | |
else: | |
return (8, 0) | |
# Parsing stack format: (st, obj) | |
def shift(st, sym, left, text): | |
action, arg = get_action(st, sym, left) | |
while action == 1: | |
lhs, count = rstate[arg] | |
objects = [] | |
for _ in range(count): | |
st, obj = st[0] | |
objects.append(obj) | |
objects.reverse() | |
robj = reduction(arg, objects) | |
assert lhs is not None | |
action, arg = state[st[1]][lhs] | |
assert action != 1 | |
layout = st[2] if (action & 2 == 0) else st[3] | |
st = (st, robj), arg, layout, st[3] | |
action, arg = get_action(st, sym, left) | |
if action & 8 != 0: | |
return None | |
if action & 4 != 0 and p.left != left: | |
return None | |
layout = st[2] if (action & 2 == 0) else left | |
return (st, text), arg, layout, left | |
def finish(st): | |
action, arg = get_action(st, None, 0) | |
while action == 1: | |
lhs, count = rstate[arg] | |
objects = [] | |
for _ in range(count): | |
st, obj = st[0] | |
objects.append(obj) | |
objects.reverse() | |
if lhs is None: | |
return True, objects[0] | |
robj = reduction(arg, objects) | |
action, arg = state[st[1]][lhs] | |
assert action != 1 | |
st = (st, robj), arg, st[2], st[3] | |
action, arg = get_action(st, None, 0) | |
return False, None | |
def predict(st): | |
for sym in state[st[1]]: | |
if sym is None: | |
pass | |
#ok, ct = finish(st) | |
#if ok: | |
# yield (sym | |
elif sym in lexemes: | |
ct = shift(st, sym, 1, None) | |
if ct is not None: | |
yield (sym, ct) | |
def predict_cont(st, depth): | |
if depth == 0: | |
for sym, ct in predict(st): | |
yield [sym], ct | |
else: | |
for syms, ct in predict_cont(st, depth-1): | |
for sym, dt in predict(ct): | |
yield syms + [sym], dt | |
def advance(sym, text, start, stop): | |
if p.mode == 0: | |
st = shift(p.st, sym, start[0], text) | |
if st is None: | |
p.mode = 1 | |
p.syms.append(sym) | |
p.texts.append(text) | |
p.starts.append(start) | |
p.stops.append(stop) | |
else: | |
p.st = st | |
else: | |
p.syms.append(sym) | |
p.texts.append(text) | |
p.starts.append(start) | |
p.stops.append(stop) | |
if len(p.syms) >= 20: | |
repair() | |
def repair(): | |
cand = None | |
for syms, ct in predict_cont(p.st, 3): | |
for i, k in enumerate(editdistance(syms, p.syms)[len(syms)]): | |
if cand is None: | |
cand = k, i, syms | |
elif k < cand[0] or (k == cand[0] and cand[1] >= i): | |
cand = k, i, syms | |
seq = rewritesequence(p.syms[:cand[1]], cand[2]) | |
syms = p.syms | |
texts = p.texts | |
starts = p.starts | |
stops = p.stops | |
stop = p.starts[0] | |
p.mode = 0 | |
p.syms = [] | |
p.texts = [] | |
p.starts = [] | |
p.stops = [] | |
for act, arg in seq: | |
if act in ('del', 'rw', 'shift'): | |
sym = syms.pop(0) | |
text = texts.pop(0) | |
start = starts.pop(0) | |
stop = stops.pop(0) | |
if act == 'rw': | |
sys.stderr.write("syntax error at line {}: rewrote {} to {} at col {}\n".format( | |
start[1], sym, arg, start[0])) | |
sym = arg | |
advance(sym, text, start, stop) | |
elif act == 'shift': | |
advance(sym, text, start, stop) | |
elif act == 'del': | |
sys.stderr.write("syntax error at line {}: deleted {} at col {}\n".format( | |
start[1], sym, start[0])) | |
elif act == 'ins': | |
sys.stderr.write("syntax error at line {}: inserted {} to col {}\n".format( | |
stop[1], arg, stop[0])) | |
advance(arg, None, stop, stop) | |
while len(syms) > 0: | |
sym = syms.pop(0) | |
text = texts.pop(0) | |
start = starts.pop(0) | |
stop = stops.pop(0) | |
advance(sym, text, start, stop) | |
def editdistance(s, t): | |
m = len(s) | |
n = len(t) | |
d = [[i for j in range(n+1)] for i in range(m+1)] | |
for j in range(1, n+1): | |
d[0][j] = j | |
for i in range(1, m+1): | |
substitution_cost = int(s[i-1] != t[j-1]) | |
d[i][j] = min( | |
d[i-1][j] + 1, # deletion | |
min(d[i][j-1] + 1, # insertion | |
d[i-1][j-1] + substitution_cost)) # substitution | |
return d | |
def rewritesequence(s, t): | |
d = editdistance(s, t) | |
i = len(s) | |
j = len(t) | |
stack = [] | |
while i > 0 and j > 0: | |
if d[i][j] == d[i][j-1]+1: | |
stack.append(('ins', t[j-1])) | |
j -= 1 | |
elif d[i][j] == d[i-1][j-1] + int(s[i-1] != t[j-1]): | |
if s[i-1] == t[j-1]: | |
stack.append(('shift', None)) | |
else: | |
stack.append(('rw', t[j-1])) | |
i -= 1 | |
j -= 1 | |
elif d[i][j] == d[i-1][j]+1: | |
stack.append(('del', None)) | |
i -= 1 | |
else: | |
assert False | |
while j > 0: | |
stack.append(('ins', t[j-1])) | |
j -= 1 | |
while i > 0: | |
stack.append(('del', None)) | |
i -= 1 | |
stack.reverse() | |
return stack | |
def reduction(rule, args): | |
if rule == 1: | |
return [] | |
if rule == 2: | |
return args[0] + [args[1]] | |
if rule == 3: | |
return [':block'] + args[1] | |
return [rule] + args | |
parser = argparse.ArgumentParser( | |
description='A test parser for toy language') | |
parser.add_argument('filename', type=str, | |
help='The input file for the parser') | |
parser.add_argument('-p', '--printout', | |
action='store_const', const=True, default=False, | |
help='Print more details to stdout') | |
args = parser.parse_args() | |
tok = Tokenizer() | |
with open(args.filename, 'r') as fd: | |
for ch in fd.read(): | |
tokenize(tok, ch) | |
tokenize(tok, "") | |
if p.mode != 0: | |
repair() | |
result, tree = finish(p.st) | |
if result: | |
if args.printout: | |
pp = pprint.PrettyPrinter(indent=2) | |
pp.pprint(tree) | |
else: | |
print "parse error at eof" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment