Created
February 8, 2016 15:21
-
-
Save aatishnn/5d613d48aec77a3fdcf1 to your computer and use it in GitHub Desktop.
Top Down Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from prettytable import PrettyTable | |
epsila = '@' | |
''' | |
S -> ABC | |
A -> abA | ab | |
B -> bD | |
D -> CD | @ | |
C -> c | cC | |
E -> TA | |
A -> +TA|@ | |
T -> FB | |
B -> *FB|@ | |
F -> (E)|i | |
''' | |
#G = {'S': ['ABC',], 'A': ['abD',], 'D': ['A', '@'], 'B': ['bE',], 'E': ['CE', '@',], 'C':['cF', ], 'F': ['C', '@',]} | |
G = {'S': ['AC',], 'A': ['abD', 'bD',], 'D': ['A', '@'], 'C':['cF', ], 'F': ['C', '@',]} | |
TERMINALS = 'abc' | |
#NON_TERMINALS = 'ABCDEFS' | |
NON_TERMINALS = 'ACDFS' | |
START_SYM = 'S' | |
RH_MARKER = '$' | |
TEST_INPUT = 'abbcc' | |
# G = {'E':['TA',], 'A':['+TA','@'],'T':['FB',],'B':['*FB','@'],'F':['(E)','i'],} | |
# TERMINALS = '()i+*' | |
# NON_TERMINALS = 'ETABF' | |
# START_SYM = 'E' | |
# RH_MARKER = '$' | |
# TEST_INPUT = 'i+i*i' | |
def has_epsila_derivation(G, alpha): | |
return epsila in G.get(alpha, []) | |
def FIRST(G, alpha): | |
''' alpha may be terminal or non-terminal ''' | |
if alpha in TERMINALS or alpha == epsila: | |
return [alpha,] | |
first_set = set() | |
# In case of non-terminal, see productions to find first | |
productions = G[alpha] | |
for production in productions: | |
for alpha in production: | |
first_value = alpha | |
if first_value in TERMINALS: | |
first_set.add(first_value) | |
break | |
elif first_value == epsila: | |
first_set.add(epsila) | |
break | |
elif first_value in NON_TERMINALS: | |
first_set.update(FIRST(G, first_value)) | |
# But check whether it gives epsila derivation. In that case, | |
# see the first of next literal. | |
if not has_epsila_derivation(G, first_value): | |
break | |
else: | |
first_set.remove(epsila) | |
else: | |
raise ValueError('Invalid literal: no terminal or non-terminal or epsila') | |
return list(first_set) | |
def test_has_epsila_derivation(): | |
assert has_epsila_derivation(G, 'D') == True | |
assert has_epsila_derivation(G, 'A') == False | |
def test_FIRST(): | |
assert FIRST(G, 'a') == ['a'] | |
assert FIRST(G, 'A') == ['a'] | |
assert FIRST(G, 'B') == ['b'] | |
assert FIRST(G, 'D') == ['@', 'c'] | |
assert FIRST(G, 'C') == ['c'] | |
assert FIRST(G, 'S') == ['a'] | |
def FOLLOW(G, alpha, called_by=[]): | |
''' alpha may be terminal or non-terminal ''' | |
follow_set = set() | |
if alpha == START_SYM: | |
follow_set.add(RH_MARKER) | |
for A, productions in G.iteritems(): | |
for production in productions: | |
if alpha in production: | |
#print productions | |
#print "Using production ", production, "for alpha:", alpha | |
#print follow_set | |
alpha_index = production.index(alpha) | |
next_index = alpha_index + 1 | |
while(True): | |
#print "0" | |
if next_index == len(production): | |
#print "1" | |
if not A in called_by and A != alpha: | |
#print "Follow" | |
follow_set.update(FOLLOW(G, A, called_by + [alpha,])) | |
break | |
elif production[next_index] in TERMINALS: | |
#print "2" | |
follow_set.add(production[next_index]) | |
break | |
elif production[next_index] in NON_TERMINALS: | |
#print "3" | |
#print "First" | |
follow_set.update(FIRST(G, production[next_index])) | |
if not has_epsila_derivation(G, production[next_index]): | |
break | |
#print "Epsila derivation", next_index | |
next_index = next_index + 1 | |
try: | |
follow_set.remove(epsila) | |
except KeyError: | |
pass | |
return list(follow_set) | |
def test_FOLLOW(): | |
assert FOLLOW(G, 'S') == ['$'] | |
assert FOLLOW(G, 'A') == ['b'] | |
assert FOLLOW(G, 'B') == ['c'] | |
assert FOLLOW(G, 'C') == ['c', '$'] | |
assert FOLLOW(G, 'D') == ['c'] | |
def print_FIRST_FOLLOW(G): | |
x = PrettyTable(['Production', 'First(alpha)', 'Follow(A)',]) | |
for A, productions in G.iteritems(): | |
for production in productions: | |
prod_str = "%s -> %-3s" %(A, production) | |
x.add_row([prod_str, FIRST(G, production[0]), FOLLOW(G, A)]) | |
print x.get_string() | |
def generate_table(G): | |
M = {} | |
for n in NON_TERMINALS: | |
M[n] = {} | |
for m in TERMINALS + '$': | |
M[n][m] = [] | |
for A, productions in G.iteritems(): | |
for production in productions: | |
first = FIRST(G, production[0]) | |
follow = FOLLOW(G, A) | |
for f in first: | |
if f != epsila: | |
M[A][f].append(production) | |
else: | |
for h in follow: | |
M[A][h].append(production) | |
return M | |
def print_TABLE(G): | |
M = generate_table(G) | |
x = PrettyTable() | |
x.add_column(' ', NON_TERMINALS) | |
for t in TERMINALS + '$': | |
column = [] | |
for n in NON_TERMINALS: | |
production = M[n][t] | |
if len(production) == 0: | |
production = "Error" | |
if len(production) == 1: | |
production = production[0] | |
column.append(production) | |
x.add_column(t, column) | |
print x.get_string() | |
print_FIRST_FOLLOW(G) | |
print_TABLE(G) | |
def test_parse(G, inp): | |
M = generate_table(G) | |
inp = list(inp + RH_MARKER) | |
stack = [RH_MARKER, START_SYM] | |
action = '-' | |
while(True): | |
print "%-30s %-40s %-30s" %(stack, inp, action) | |
X = stack[-1] | |
e = inp[0] | |
if X in TERMINALS or X in '$': | |
if X == e: | |
stack.pop() | |
inp.pop(0) | |
action = "Terminal Match" | |
else: | |
print "Error due to terminal non matching" | |
return | |
else: | |
if len(M[X][e]) != 0: | |
production_to_append = list(M[X][e][0]) | |
production_to_append.reverse() | |
stack.pop() | |
stack.extend(list(production_to_append)) | |
action = "Replace with", M[X][e] | |
else: | |
print "Error" | |
return | |
# Remove epsila from stack | |
try: | |
stack.remove(epsila) | |
except ValueError: | |
pass | |
if X == '$': | |
print "Parsing success" | |
break | |
test_parse(G, TEST_INPUT) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment