|
# -*- coding: utf-8 -*- |
|
#!/usr/bin/python |
|
|
|
import sys |
|
|
|
class Production: |
|
|
|
def __init__(self, N, RHS): |
|
self.N = N |
|
self.RHS = RHS |
|
|
|
def symbols(self): |
|
return [s for s in self.RHS] |
|
|
|
def __str__(self): |
|
return self.N+" → "+self.RHS |
|
|
|
class State: |
|
|
|
def __init__(self, N, S1, S2, i, ast=None): |
|
self.N = N |
|
self.S1 = S1 |
|
self.S2 = S2 |
|
self.i = i |
|
if not ast: |
|
ast = AST(self.N, []) |
|
self.ast = ast |
|
|
|
def complete(self): |
|
return len(self.S2)==0 |
|
|
|
def nextcat(self): |
|
return self.S2[0] |
|
|
|
def advance(self): |
|
return State(self.N, self.S1+self.S2[0], self.S2[1:], self.i, AST(self.N, self.ast.children[:])) |
|
|
|
def __str__(self): |
|
return "[%s → %s•%s, %d]" % (self.N, self.S1, self.S2, self.i) |
|
|
|
def __eq__(self, s2): |
|
return self.N==s2.N and self.S1==s2.S1 and self.S2==s2.S2 and self.i==s2.i |
|
|
|
class AST: |
|
|
|
def __init__(self, label, children): |
|
self.label = label |
|
self.children = children |
|
|
|
def __str__(self, first=False): |
|
s = "" |
|
if first: |
|
s += "\Tree " |
|
s += "[.%s %s]" % (self.label, " ".join(map(str, self.children))) |
|
return s |
|
|
|
class Grammar: |
|
|
|
def __init__(self, N, T, P, S): |
|
assert(S in N and all(s in T or s in N for p in P for s in p.symbols()) and all(p.N in N for p in P)) |
|
self.N = N |
|
self.T = T |
|
self.P = P |
|
self.S = S |
|
|
|
def __str__(self): |
|
return "\n".join(map(str, self.P))+"\n" |
|
|
|
def parse(self, string): |
|
# Earley |
|
string = string+"$" |
|
i = 0 |
|
n = len(string)+1 |
|
predicted = [[] for i in xrange(n+1)] |
|
predicted[0] = [State("G", "", "S$", 0)] |
|
|
|
for i in xrange(n+1): |
|
j = 0 |
|
while j < len(predicted[i]): |
|
state = predicted[i][j] |
|
if not state.complete(): |
|
c = state.nextcat() |
|
if c in self.N: |
|
self.predict(state, predicted[i], i) |
|
else: |
|
self.scan(state, string[i:], predicted[i+1], i) |
|
else: |
|
self.complete(state, predicted, i) |
|
j += 1 |
|
|
|
expected = State("G","S$","",0) |
|
for p in predicted[n-1]: |
|
if p == expected: |
|
return p.ast |
|
return None |
|
|
|
def predict(self, state, predicted, i): |
|
n = state.nextcat() |
|
for production in self.P: |
|
if production.N == n: |
|
st = State(n, "", production.RHS, i) |
|
if st not in predicted: |
|
predicted.append(State(n, "", production.RHS, i)) |
|
|
|
def scan(self, state, string, predicted, i): |
|
if string[0] == state.nextcat(): |
|
s2 = state.advance() |
|
if s2 not in predicted: |
|
s2.ast.children.append(AST(string[0],[])) |
|
predicted.append(s2) |
|
|
|
def complete(self, completedState, predicted, k): |
|
A = completedState.N |
|
j = completedState.i |
|
# completedState = A -> y•, j |
|
|
|
for state in predicted[j]: |
|
if not state.complete() and state.nextcat() == A: |
|
# state = B -> x•Az, i |
|
s2 = state.advance() |
|
# s2 = B -> xA•z, i |
|
if s2 not in predicted[k]: |
|
s2.ast.children.append(completedState.ast) |
|
predicted[k].append(s2) |
|
|
|
def main(): |
|
# S -> S + M | M |
|
# M = M * T | T |
|
# T = 1 | 2 | 3 | 4 |
|
S0 = Production("G", "S") |
|
S1 = Production("S", "S+M") |
|
S2 = Production("S", "M") |
|
M1 = Production("M", "M*T") |
|
M2 = Production("M", "T") |
|
T1 = Production("T", "1") |
|
T2 = Production("T", "2") |
|
T3 = Production("T", "3") |
|
T4 = Production("T", "4") |
|
G = Grammar("GSMT", "1234*+$", [S0,S1,S2,M1,M2,T1,T2,T3,T4], "G") |
|
# print G |
|
s = "" if len(sys.argv) == 1 else sys.argv[1] |
|
ast = G.parse(s) |
|
|
|
if ast: |
|
print ast.__str__(True).replace("$", "\$") |
|
else: |
|
print >> sys.stderr, "parse error: the string doesn't belong to L(G)" |
|
|
|
if __name__ == "__main__": |
|
main() |