Created
July 26, 2020 15:44
-
-
Save FranchuFranchu/83a2cec801c7e0d80e83e0a21094890e to your computer and use it in GitHub Desktop.
Earley parser in GDScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extends Object | |
class_name Parser | |
# Based off https://github.com/tomerfiliba/tau/blob/master/earley3.py | |
class Test: | |
static func tostring(val): | |
if typeof(val) == TYPE_STRING: | |
return val | |
elif val == null: | |
return "None" | |
else: | |
return val._str() | |
class Production: | |
var terms = [] | |
func _init(terms): | |
self.terms = terms | |
func get_class(): | |
return "Production" | |
func id(): | |
var ts = "" | |
for i in self.terms: | |
ts += Test.tostring(i) + "," | |
return ts | |
class Rule: | |
var name = "" | |
var productions = [] | |
func _init(name, productions = []): | |
self.name = name | |
self.productions = productions | |
func add(productions_): | |
self.productions = self.productions + productions_ | |
func get_class(): | |
return "Rule" | |
func _str(): | |
return self.name | |
class State: | |
var name | |
var production | |
var start_column | |
var end_column | |
var dot_index | |
var rules | |
func _init(name, production, dot_index, start_column): | |
self.name = name | |
self.production = production | |
self.dot_index = dot_index | |
self.start_column = start_column | |
self.rules = [] | |
for i in production.terms: | |
if typeof(i) == TYPE_STRING: | |
continue | |
if i.get_class() == "Rule": | |
self.rules.append(i) | |
func completed(): | |
return self.dot_index >= len(self.production.terms) | |
func id(): | |
return self.name + self.production.id() | |
func next_term(): | |
if self.completed(): | |
return null | |
return self.production.terms[self.dot_index] | |
func get_class(): | |
return "State" | |
func _str(): | |
var terms = [] | |
for p in self.production.terms: | |
if typeof(p) == TYPE_STRING: | |
terms.append(p) | |
continue | |
terms.append(p._str()) | |
terms.insert(self.dot_index, "$") | |
return "%-5s -> %-16s [%s-%s]" % [self.name, PoolStringArray(terms).join(" "), self.start_column._str(), self.end_column._str()] | |
class Column: | |
var index | |
var token | |
var states | |
var _unique | |
func _init(index, token): | |
self.index = index | |
self.token = token | |
self.states = [] | |
self._unique = [] | |
func add(state): | |
if not self._unique.has(state.id()): | |
self._unique.append(state.id()) | |
state.end_column = self | |
self.states.append(state) | |
return true | |
return false | |
func _str(): | |
return str(self.index) | |
func print_(): | |
var toks = Test.tostring(self.token) | |
print("[%s] %s" % [self.index, toks]) | |
print("=".repeat(35)) | |
for s in self.states: | |
print(s._str()) | |
print() | |
class Node_parser: | |
var value | |
var children | |
func _init(value, children): | |
self.value = value | |
self.children = children | |
func print_(level = 0): | |
print(" ".repeat(level) + Test.tostring(self.value)) | |
for child in self.children: | |
child.print_(level + 1) | |
func predict(col, rule): | |
for prod in rule.productions: | |
col.add(State.new(rule.name, prod, 0, col)) | |
func scan(col, state, token): | |
if token != col.token: | |
print("return") | |
return | |
col.add(State.new(state.name, state.production, state.dot_index + 1, state.start_column)) | |
func complete(col, state): | |
if not state.completed(): | |
return | |
for st in state.start_column.states: | |
var term = st.next_term() | |
if typeof(term) == TYPE_STRING: | |
continue | |
elif typeof(term) == TYPE_NIL: | |
continue | |
elif not term.get_class() == "Rule": | |
continue | |
elif term.name == state.name: | |
col.add(State.new(st.name, st.production, st.dot_index + 1, st.start_column)) | |
const GAMMA_RULE = "GAMMA" | |
func parse(rule, text): | |
var table = [] | |
var idx = 0 | |
for tok in ([null] + Array(text.to_lower().split(" "))): | |
table.append(Column.new(idx, tok)) | |
idx += 1 | |
table[0].add(State.new(GAMMA_RULE, Production.new([rule]), 0, table[0])) | |
for i in table: | |
i.print_() | |
idx = 0 | |
for col in table: | |
print(col._str()) | |
for state in col.states: | |
print(" ", state._str()) | |
if state.completed(): | |
print("complete") | |
complete(col, state) | |
else: | |
var term = state.next_term() | |
print(" ", Test.tostring(term)) | |
if not (typeof(term) == TYPE_STRING) and term.get_class() == "Rule": | |
predict(col, term) | |
elif (idx + 1) < len(table): | |
print("end?") | |
print(idx, " ", state._str(), " ") | |
scan(table[idx+1], state, term) | |
idx += 1 | |
# find gamma rule in last table column (otherwise fail) | |
for st in table[-1].states: | |
if st.name == GAMMA_RULE and st.completed(): | |
return st | |
print("Fail while parsing") | |
return false | |
func build_trees(state): | |
return build_trees_helper([], state, len(state.rules) - 1, state.end_column) | |
func build_trees_helper(children, state, rule_index, end_column): | |
var start_column | |
if rule_index < 0: | |
return [Node_parser.new(state, children)] | |
elif rule_index == 0: | |
start_column = state.start_column | |
else: | |
start_column = null | |
var rule = state.rules[rule_index] | |
var outputs = [] | |
for st in end_column.states: | |
if st == state: | |
break | |
if st == state or not st.completed() or st.name != rule.name: | |
continue | |
if start_column != null and st.start_column != start_column: | |
continue | |
for sub_tree in build_trees(st): | |
for node in build_trees_helper([sub_tree] + children, state, rule_index - 1, st.start_column): | |
outputs.append(node) | |
return outputs | |
func test(): | |
print("test") | |
var N = Rule.new("N", [Production.new(["time"]), Production.new(["flight"]), Production.new(["banana"]), Production.new(["flies"]), Production.new(["boy"]), Production.new(["telescope"])]) | |
var D = Rule.new("D", [Production.new(["the"]), Production.new(["a"]), Production.new(["an"])]) | |
var V = Rule.new("V", [Production.new(["book"]), Production.new(["eat"]), Production.new(["sleep"]), Production.new(["saw"])]) | |
var P = Rule.new("P", [Production.new(["with"]), Production.new(["in"]), Production.new(["on"]), Production.new(["at"]), Production.new(["through"])]) | |
print("test") | |
var PP = Rule.new("PP") | |
var NP = Rule.new("NP", [Production.new([D, N]), Production.new(["john"]), Production.new(["houston"])]) | |
NP.add([Production.new([NP, PP])]) | |
PP.add([Production.new([P, NP])]) | |
print("test") | |
var VP = Rule.new("VP", [Production.new([V, NP])]) | |
VP.add([Production.new([VP, PP])]) | |
var S = Rule.new("S", [Production.new([NP, VP]), Production.new([VP])]) | |
var result = parse(S, "book the flight through houston") | |
for tree in build_trees(result): | |
tree.print_() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment