FranchuFranchu · July 26, 2020 15:44
diff --git a/earley.gd b/earley.gd
 extends Object

 class_name Parser

 # Based off https://github.com/tomerfiliba/tau/blob/master/earley3.py

 class Test:
 	static func tostring(val):
 		
 		if typeof(val) == TYPE_STRING:
 			return val
 		elif val == null:
 			return "None"
 		else:
 			return val._str()

 class Production:
 	var terms = []
 	func _init(terms):
 		self.terms = terms
 		
 	func get_class():
 		return "Production"
 		
 	func id():
 		var ts = ""
 		for i in self.terms:
 			ts += Test.tostring(i) + ","
 		return ts
 		
 class Rule:
 	var name = ""
 	var productions = []
 	func _init(name, productions = []):
 		self.name = name
 		self.productions = productions
 		
 	func add(productions_):
 		self.productions = self.productions + productions_
 		
 	func get_class():
 		return "Rule"
 	func _str():
 		return self.name
 	
 		
 class State:
 	var name
 	var production
 	var start_column
 	var end_column
 	var dot_index
 	var rules
 	
 	func _init(name, production, dot_index, start_column):
 		self.name = name
 		self.production = production
 		self.dot_index = dot_index
 		self.start_column = start_column
 		self.rules = []
 		for i in production.terms:
 			if typeof(i) == TYPE_STRING:
 				continue
 			
 			if i.get_class() == "Rule":
 				self.rules.append(i)
 		
 	func completed():
 		return self.dot_index >= len(self.production.terms)
 	
 	func id():
 		
 		return self.name + self.production.id()
 	
 	func next_term():
 		if self.completed():
 			return null
 			
 		return self.production.terms[self.dot_index]
 			
 	func get_class():
 		return "State"
 		
 	func _str():
 		var terms = []
 		for p in self.production.terms:
 			if typeof(p) == TYPE_STRING:
 				terms.append(p)
 				continue
 			terms.append(p._str())
 		
 		terms.insert(self.dot_index, "$")
 		return "%-5s -> %-16s [%s-%s]" % [self.name, PoolStringArray(terms).join(" "), self.start_column._str(), self.end_column._str()]

 class Column:
 	var index
 	var token
 	var states
 	var _unique
 	func _init(index, token):
 		self.index = index
 		self.token = token
 		self.states = []
 		self._unique = []
 	func add(state):
 		if not self._unique.has(state.id()):
 			self._unique.append(state.id())
 			state.end_column = self
 			self.states.append(state)
 			return true
 		return false
 	func _str():
 		return str(self.index)
 		
 	func print_():
 		var toks = Test.tostring(self.token)
 			
 		print("[%s] %s" % [self.index, toks])
 		print("=".repeat(35))
 		for s in self.states:
 			print(s._str())
 		print()


 class Node_parser:
 	var value
 	var children
 	func _init(value, children):
 		self.value = value
 		self.children = children
 		
 	func print_(level = 0):
 		print("  ".repeat(level) + Test.tostring(self.value))
 		for child in self.children:
 			child.print_(level + 1)
 		


 func predict(col, rule):
 	for prod in rule.productions:
 		col.add(State.new(rule.name, prod, 0, col))

 func scan(col, state, token):
 	if token != col.token:
 		print("return")
 		return
 	col.add(State.new(state.name, state.production, state.dot_index + 1, state.start_column))

 func complete(col, state):
 	if not state.completed():
 		return
 	for st in state.start_column.states:
 		var term = st.next_term()
 		if typeof(term) == TYPE_STRING:
 			continue
 		elif typeof(term) == TYPE_NIL:
 			continue
 		elif not term.get_class() == "Rule":
 			continue
 		elif term.name == state.name:
 			col.add(State.new(st.name, st.production, st.dot_index + 1, st.start_column))


 const GAMMA_RULE = "GAMMA"

 func parse(rule, text):
 	var table = []
 	
 	var idx = 0
 	for tok in ([null] + Array(text.to_lower().split(" "))):
 		table.append(Column.new(idx, tok))
 		idx += 1
 	
 	
 	table[0].add(State.new(GAMMA_RULE, Production.new([rule]), 0, table[0]))
 	
 	for i in table:
 		i.print_()
 	
 	idx = 0
 	for col in table:
 		print(col._str())
 		
 		for state in col.states:
 			print(" ", state._str())
 			if state.completed():
 				print("complete")
 				complete(col, state)
 			else:
 				var term = state.next_term()
 				
 				print("  ", Test.tostring(term))
 				
 				if not (typeof(term) == TYPE_STRING) and term.get_class() == "Rule":
 					predict(col, term)
 				elif (idx + 1) < len(table):
 					print("end?")
 					print(idx, " ", state._str(), " ")
 					scan(table[idx+1], state, term)
 		
 		idx += 1
 		

 	# find gamma rule in last table column (otherwise fail)
 	for st in table[-1].states:
 		if st.name == GAMMA_RULE and st.completed():
 			return st
 	
 	print("Fail while parsing")
 	
 	return false


 func build_trees(state):
 	return build_trees_helper([], state, len(state.rules) - 1, state.end_column)

 func build_trees_helper(children, state, rule_index, end_column):
 	var start_column
 	if rule_index < 0:
 		return [Node_parser.new(state, children)]
 	elif rule_index == 0:
 		start_column = state.start_column
 	else:
 		start_column = null
 	
 	var rule = state.rules[rule_index]
 	var outputs = []
 	for st in end_column.states:
 		if st == state:
 			break
 		if st == state or not st.completed() or st.name != rule.name:
 			continue
 		if start_column != null and st.start_column != start_column:
 			continue
 		for sub_tree in build_trees(st):
 			for node in build_trees_helper([sub_tree] + children, state, rule_index - 1, st.start_column):
 				outputs.append(node)
 	return outputs

 func test():
 	
 	print("test")
 	var N = Rule.new("N", [Production.new(["time"]), Production.new(["flight"]), Production.new(["banana"]),  Production.new(["flies"]), Production.new(["boy"]), Production.new(["telescope"])])
 	var D = Rule.new("D", [Production.new(["the"]), Production.new(["a"]), Production.new(["an"])])
 	var V = Rule.new("V", [Production.new(["book"]), Production.new(["eat"]), Production.new(["sleep"]), Production.new(["saw"])])
 	var P = Rule.new("P", [Production.new(["with"]), Production.new(["in"]), Production.new(["on"]), Production.new(["at"]), Production.new(["through"])])
 	
 	print("test")
 	var PP = Rule.new("PP")
 	var NP = Rule.new("NP", [Production.new([D, N]), Production.new(["john"]), Production.new(["houston"])])
 	NP.add([Production.new([NP, PP])])
 	PP.add([Production.new([P, NP])])
 	
 	print("test")
 	var VP = Rule.new("VP", [Production.new([V, NP])])
 	VP.add([Production.new([VP, PP])])
 	var S = Rule.new("S", [Production.new([NP, VP]), Production.new([VP])])
 	
 	var result = parse(S, "book the flight through houston")
 	
 	for tree in build_trees(result):
 		tree.print_()
	extends Object

	class_name Parser

	# Based off https://github.com/tomerfiliba/tau/blob/master/earley3.py

	class Test:
	static func tostring(val):

	if typeof(val) == TYPE_STRING:
	return val
	elif val == null:
	return "None"
	else:
	return val._str()

	class Production:
	var terms = []
	func _init(terms):
	self.terms = terms

	func get_class():
	return "Production"

	func id():
	var ts = ""
	for i in self.terms:
	ts += Test.tostring(i) + ","
	return ts

	class Rule:
	var name = ""
	var productions = []
	func _init(name, productions = []):
	self.name = name
	self.productions = productions

	func add(productions_):
	self.productions = self.productions + productions_

	func get_class():
	return "Rule"
	func _str():
	return self.name


	class State:
	var name
	var production
	var start_column
	var end_column
	var dot_index
	var rules

	func _init(name, production, dot_index, start_column):
	self.name = name
	self.production = production
	self.dot_index = dot_index
	self.start_column = start_column
	self.rules = []
	for i in production.terms:
	if typeof(i) == TYPE_STRING:
	continue

	if i.get_class() == "Rule":
	self.rules.append(i)

	func completed():
	return self.dot_index >= len(self.production.terms)

	func id():

	return self.name + self.production.id()

	func next_term():
	if self.completed():
	return null

	return self.production.terms[self.dot_index]

	func get_class():
	return "State"

	func _str():
	var terms = []
	for p in self.production.terms:
	if typeof(p) == TYPE_STRING:
	terms.append(p)
	continue
	terms.append(p._str())

	terms.insert(self.dot_index, "$")
	return "%-5s -> %-16s [%s-%s]" % [self.name, PoolStringArray(terms).join(" "), self.start_column._str(), self.end_column._str()]

	class Column:
	var index
	var token
	var states
	var _unique
	func _init(index, token):
	self.index = index
	self.token = token
	self.states = []
	self._unique = []
	func add(state):
	if not self._unique.has(state.id()):
	self._unique.append(state.id())
	state.end_column = self
	self.states.append(state)
	return true
	return false
	func _str():
	return str(self.index)

	func print_():
	var toks = Test.tostring(self.token)

	print("[%s] %s" % [self.index, toks])
	print("=".repeat(35))
	for s in self.states:
	print(s._str())
	print()


	class Node_parser:
	var value
	var children
	func _init(value, children):
	self.value = value
	self.children = children

	func print_(level = 0):
	print(" ".repeat(level) + Test.tostring(self.value))
	for child in self.children:
	child.print_(level + 1)



	func predict(col, rule):
	for prod in rule.productions:
	col.add(State.new(rule.name, prod, 0, col))

	func scan(col, state, token):
	if token != col.token:
	print("return")
	return
	col.add(State.new(state.name, state.production, state.dot_index + 1, state.start_column))

	func complete(col, state):
	if not state.completed():
	return
	for st in state.start_column.states:
	var term = st.next_term()
	if typeof(term) == TYPE_STRING:
	continue
	elif typeof(term) == TYPE_NIL:
	continue
	elif not term.get_class() == "Rule":
	continue
	elif term.name == state.name:
	col.add(State.new(st.name, st.production, st.dot_index + 1, st.start_column))


	const GAMMA_RULE = "GAMMA"

	func parse(rule, text):
	var table = []

	var idx = 0
	for tok in ([null] + Array(text.to_lower().split(" "))):
	table.append(Column.new(idx, tok))
	idx += 1


	table[0].add(State.new(GAMMA_RULE, Production.new([rule]), 0, table[0]))

	for i in table:
	i.print_()

	idx = 0
	for col in table:
	print(col._str())

	for state in col.states:
	print(" ", state._str())
	if state.completed():
	print("complete")
	complete(col, state)
	else:
	var term = state.next_term()

	print(" ", Test.tostring(term))

	if not (typeof(term) == TYPE_STRING) and term.get_class() == "Rule":
	predict(col, term)
	elif (idx + 1) < len(table):
	print("end?")
	print(idx, " ", state._str(), " ")
	scan(table[idx+1], state, term)

	idx += 1


	# find gamma rule in last table column (otherwise fail)
	for st in table[-1].states:
	if st.name == GAMMA_RULE and st.completed():
	return st

	print("Fail while parsing")

	return false


	func build_trees(state):
	return build_trees_helper([], state, len(state.rules) - 1, state.end_column)

	func build_trees_helper(children, state, rule_index, end_column):
	var start_column
	if rule_index < 0:
	return [Node_parser.new(state, children)]
	elif rule_index == 0:
	start_column = state.start_column
	else:
	start_column = null

	var rule = state.rules[rule_index]
	var outputs = []
	for st in end_column.states:
	if st == state:
	break
	if st == state or not st.completed() or st.name != rule.name:
	continue
	if start_column != null and st.start_column != start_column:
	continue
	for sub_tree in build_trees(st):
	for node in build_trees_helper([sub_tree] + children, state, rule_index - 1, st.start_column):
	outputs.append(node)
	return outputs

	func test():

	print("test")
	var N = Rule.new("N", [Production.new(["time"]), Production.new(["flight"]), Production.new(["banana"]), Production.new(["flies"]), Production.new(["boy"]), Production.new(["telescope"])])
	var D = Rule.new("D", [Production.new(["the"]), Production.new(["a"]), Production.new(["an"])])
	var V = Rule.new("V", [Production.new(["book"]), Production.new(["eat"]), Production.new(["sleep"]), Production.new(["saw"])])
	var P = Rule.new("P", [Production.new(["with"]), Production.new(["in"]), Production.new(["on"]), Production.new(["at"]), Production.new(["through"])])

	print("test")
	var PP = Rule.new("PP")
	var NP = Rule.new("NP", [Production.new([D, N]), Production.new(["john"]), Production.new(["houston"])])
	NP.add([Production.new([NP, PP])])
	PP.add([Production.new([P, NP])])

	print("test")
	var VP = Rule.new("VP", [Production.new([V, NP])])
	VP.add([Production.new([VP, PP])])
	var S = Rule.new("S", [Production.new([NP, VP]), Production.new([VP])])

	var result = parse(S, "book the flight through houston")

	for tree in build_trees(result):
	tree.print_()
No results found