Last active
November 25, 2018 16:41
-
-
Save ahmedbilal/da50e987fb7fc5891e1304b5baa92379 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import sys | |
""" | |
|------------------------------------------------------| | |
|Grammar notation | Code representation | | |
|------------------|-----------------------------------| | |
|Terminal | Code to match and consume a token| | |
|Nonterminal | Call to that rule’s function | | |
| | | If or switch statement | | |
| * or + | While or for loop | | |
| ? | If statement | | |
|------------------------------------------------------| | |
""" | |
RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this", | |
"extends", "implements", "for", "while", "if", "else", "return", "break", "new", | |
"NewArray", "Print", "ReadInteger", "ReadLine"] | |
class Token: | |
def __init__(self, _type, value=None, priority=0): | |
self.type = _type | |
self.value = value | |
self.priority = priority | |
self.line_no = 0 | |
def __repr__(self): | |
if not self.value: | |
return self.type + " AT Line#" + str(self.line_no) | |
return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no) | |
def ret_all_tokens(): | |
tokens = [] | |
for resr in RESERVED_WORDS: | |
tokens.append(Token("T_" + resr.upper())) | |
tokens.append(Token("T_ID")) | |
tokens.append(Token("T_BOOL")) | |
tokens.append(Token("T_INTCONSTANT")) | |
tokens.append(Token("T_STRINGCONSTANT")) | |
tokens.append(Token("T_DOUBLECONSTANT")) | |
tokens.append(Token("T_PLUS")) | |
tokens.append(Token("T_MINUS")) | |
tokens.append(Token("T_MULT")) | |
tokens.append(Token("T_DIV")) | |
tokens.append(Token("T_MOD")) | |
tokens.append(Token("T_LT")) | |
tokens.append(Token("T_LEQ")) | |
tokens.append(Token("T_GT")) | |
tokens.append(Token("T_GEQ")) | |
tokens.append(Token("T_ASSIGN")) | |
tokens.append(Token("T_EQ")) | |
tokens.append(Token("T_NEQ")) | |
tokens.append(Token("T_AND")) | |
tokens.append(Token("T_OR")) | |
tokens.append(Token("T_NOT")) | |
tokens.append(Token("T_SEMICOLON")) | |
tokens.append(Token("T_COMMA")) | |
tokens.append(Token("T_DOT")) | |
tokens.append(Token("T_ARRDECL")) | |
tokens.append(Token("T_LSB")) | |
tokens.append(Token("T_RSB")) | |
tokens.append(Token("T_LPAREN")) | |
tokens.append(Token("T_RPAREN")) | |
tokens.append(Token("T_LCB")) | |
tokens.append(Token("T_RCB")) | |
return tokens | |
EOF = Token("T_EOF") # End-Of-File Token | |
tokens = None # tokens read from file | |
if len(sys.argv) == 2: | |
filename = sys.argv[1] | |
f = open(filename, "rb") | |
tokens = pickle.load(f) | |
tokens.append(EOF) | |
else: | |
print("Unknown # of args") | |
sys.exit(-1) | |
current_index = 0 | |
def is_at_end(): | |
return peek() == EOF | |
def peek(): | |
# print("peek", tokens[current_index]) | |
return tokens[current_index] | |
def previous(): | |
# print("previous", tokens[current_index - 1]) | |
return tokens[current_index - 1] | |
def check(_type): | |
if is_at_end(): | |
return False | |
# print("check", "peek().type == _type", peek().type, _type, peek().type == _type) | |
return peek().type == _type | |
def advance(): | |
global current_index | |
if not is_at_end(): | |
current_index += 1 | |
return previous() | |
def match(types): | |
for _t in types: | |
if check(_t): | |
advance() | |
# print("matched") | |
return True | |
return False | |
FirstSet = dict() | |
FirstSet["Type"] = ["T_INT", "T_DOUBLE", "T_BOOL", "T_STRING", "T_IDENT"] | |
FirstSet["Decl"] = FirstSet["Type"] + ["T_VOID", "T_CLASS", "T_INTERFACE"] | |
FirstSet["Actuals"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_READINTEGER", "T_READLINE", | |
"T_BOOLCONSTANT", "T_DOUBLECONSTANT", "T_IDENT", "T_INTCONSTANT", "T_NEW", | |
"T_NULL","T_STRINGCONSTANT","T_THIS"] | |
FirstSet["P"] = FirstSet["Type"] + ["T_VOID"] | |
FirstSet["Stmt"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_PRINT", "T_READINTEGER", "T_READLINE", "T_BOOLCONSTANT", | |
"T_BREAK", "T_DOUBLECONSTANT", "T_FOR", "T_IDENT", "T_IF", | |
"T_INTCONSTANT","T_NEW","T_NULL","T_RETURN", "T_STRINGCONSTANT", "T_THIS", "T_WHILE", "T_LCB"] | |
FirstSet["Constant"] = ["T_INTCONSTANT", "T_DOUBLECONSTANT", "T_BOOLCONSTANT", "T_STRINGCONSTANT", "T_NULL"] | |
class Grammar(object): | |
def Program(self): | |
print("Program()", tokens[current_index]) | |
return self.Decl() and self.Program_Prime() | |
def Program_Prime(self): | |
print("Program`()", tokens[current_index]) | |
if peek().type in FirstSet["Decl"]: | |
return self.Decl() and self.Program_Prime() | |
else: | |
return True | |
def Decl(self): | |
print("Decl()", tokens[current_index]) | |
if match(["T_VOID"]): | |
return self.FunctionDecl() | |
elif match(["T_CLASS"]): | |
return self.ClassDecl() | |
elif match(["T_INTERFACE"]): | |
return self.InterfaceDecl() | |
elif peek().type in FirstSet["Type"]: | |
return self.Type() and match(["T_IDENT"]) and self.VF() | |
return True | |
def VF(self): | |
print("VF()", tokens[current_index]) | |
if check("T_SEMICOLON"): | |
return self.VariableDecl() | |
elif check("T_LPAREN"): | |
return self.FunctionDecl() | |
return False | |
def VariableDecl(self): | |
print("VariableDecl()", tokens[current_index]) | |
return match(["T_SEMICOLON"]) | |
def Variable(self): | |
print("Variable()", tokens[current_index]) | |
return self.Type() and match(["T_IDENT"]) | |
def Type(self): | |
print("Type()", tokens[current_index]) | |
return match(FirstSet["Type"]) and self.Type_Prime() | |
def Type_Prime(self): | |
print("Type_Prime()", tokens[current_index]) | |
if match(["T_LSB"]): | |
return match(["T_RSB"]) and self.Type_Prime() | |
else: | |
return True | |
def FunctionDecl(self): | |
print("FunctionDecl()", tokens[current_index]) | |
return match(["T_IDENT"]) and match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and self.StmtBlock() | |
def Formals(self): | |
print("Formals()", tokens[current_index]) | |
if peek().type in FirstSet["Type"]: | |
return self.Parameters() | |
return True | |
def Parameters(self): | |
print("Parameters()", tokens[current_index]) | |
return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.Parameters_Prime() | |
def Parameters_Prime(self): | |
print("Parameters_Prime()", tokens[current_index]) | |
if match(["T_COMMA"]): | |
return self.Variable() and self.Parameters_Prime() | |
return True | |
def ClassDecl(self): | |
print("ClassDecl()", tokens[current_index]) | |
return match(["T_CLASS"]) and match(["T_IDENT"]) and self.Extend() and self.Implement() and match(["T_LCB"]) \ | |
and self.F() and match(["T_RCB"]) | |
def Extend(self): | |
print("Extend()", tokens[current_index]) | |
if match(["T_EXTENDS"]): | |
return match(["T_IDENT"]) | |
return True | |
def Implement(self): | |
print("Implements()", tokens[current_index]) | |
if match(["T_IMPLEMENTS"]): | |
return self.K() | |
return True | |
def F(self): | |
print("F()", tokens[current_index]) | |
if match(FirstSet["Type"]): | |
return self.Field() and self.F() | |
return True | |
def K(self): | |
print("K()", tokens[current_index]) | |
return match(["T_IDENT"]) and self.K_Prime() | |
def K_Prime(self): | |
print("K`()", tokens[current_index]) | |
if match(["T_COMMA"]): | |
return match(["T_IDENT"]) and self.K_Prime() | |
return True | |
def Field(self): | |
print("Field()", tokens[current_index]) | |
return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.VF() | |
def InterfaceDecl(self): | |
print("InterfaceDecl()", tokens[current_index]) | |
return match(["T_INTERFACE"]) and match(["T_IDENT"]) and match(["T_LCB"]) and self.P() and match(["T_RCB"]) | |
def P(self): | |
print("P()", tokens[current_index]) | |
if peek().type in FirstSet["P"]: | |
return self.Prototype() and self.P() | |
return True | |
def Prototype(self): | |
print("Prototype()", tokens[current_index]) | |
return match(FirstSet["P"]) and self.Type_Prime() and match(["T_IDENT"]) and \ | |
match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and match(["T_SEMICOLON"]) | |
def StmtBlock(self): | |
print("StmtBlock()", tokens[current_index]) | |
return match(["T_LCB"]) and self.V() and self.S() and match(["T_RCB"]) | |
def V(self): | |
print("V()", tokens[current_index]) | |
if match(FirstSet["Type"]): | |
return self.Type_Prime() and match(["T_IDENT"]) and self.VariableDecl() and self.V() | |
return True | |
def S(self): | |
print("S()", tokens[current_index]) | |
if peek().type in FirstSet["Stmt"]: | |
return self.Stmt() and self.S() | |
return True | |
def Stmt(self): | |
print("Stmt()", tokens[current_index]) | |
if peek().type in FirstSet["Expr"]: | |
return self.Expr() and match(["T_SEMICOLON"]) | |
elif peek().type == "T_IF": | |
return self.IfStmt() | |
elif peek().type == "T_WHILE": | |
return self.WhileStmt() | |
elif peek().type == "T_FOR": | |
return self.ForStmt() | |
elif peek().type == "T_BREAK": | |
return self.BreakStmt() | |
elif peek().type == "T_RETURN": | |
return self.ReturnStmt() | |
elif peek().type == "T_PRINT": | |
return self.PrintStmt() | |
elif match(["T_LCB"]): | |
return self.V() and self.S() and match(["T_RCB"]) | |
return False | |
def E(self): | |
print("StEmt()", tokens[current_index]) | |
if peek().type in FirstSet["Expr"]: | |
return self.Expr() | |
return True | |
def IfStmt(self): | |
print("IfStmt()", tokens[current_index]) | |
return match(["T_IF"]) and match(["T_LPAREN"]) and self.Expr() and match(["R_PAREN"]) and \ | |
self.Stmt() and self.Else() | |
def Else(self): | |
print("Else()", tokens[current_index]) | |
if match(["T_ELSE"]): | |
return self.Stmt() | |
return True | |
def WhileStmt(self): | |
print("WhileStmt()", tokens[current_index]) | |
return match(["T_WHILE"]) and match(["T_LPAREN"]) and self.Expr() and match(["T_RPAREN"]) and self.Stmt() | |
def ForStmt(self): | |
print("ForStmt()", tokens[current_index]) | |
return match(["T_FOR"]) and match(["T_LPAREN"]) and self.E() and match(["T_SEMICOLON"]) and \ | |
self.Expr() and match(["T_SEMICOLON"]) and self.E() and match(["T_RPAREN"]) and self.Stmt() | |
def ReturnStmt(self): | |
print("ReturnStmt()", tokens[current_index]) | |
return match(["T_RETURN"]) and self.E() and match(["T_SEMICOLON"]) | |
def BreakStmt(self): | |
print("BreakStmt()", tokens[current_index]) | |
return match(["T_BREAK"]) and match(["T_SEMICOLON"]) | |
def PrintStmt(self): | |
print("PrintStmt()", tokens[current_index]) | |
return match(["T_PRINT"]) and match(["T_LPAREN"]) and self.Pr() and \ | |
match(["T_RPAREN"]) and match(["T_SEMICOLON"]) | |
def Pr(self): | |
print("Pr()", tokens[current_index]) | |
if peek().type in FirstSet["Expr"]: | |
return self.Expr() and self.Pr_Prime() | |
return False | |
def Pr_Prime(self): | |
print("Pr_Prime()", tokens[current_index]) | |
if match(["T_COMMA"]): | |
return self.Expr() and self.Pr_Prime() | |
return True | |
def Expr(self): | |
print("Expr()", tokens[current_index]) | |
if match(["T_IDENT"]): | |
return self.LC() | |
elif match(FirstSet["Constant"]): | |
return self.Constant() | |
elif match(["T_THIS"]): | |
return True | |
elif match(["T_LPAREN"]): | |
return self.Expr() and match(["T_RPAREN"]) | |
elif match(["T_NOT"]): | |
return self.Expr() | |
elif match(["T_READINTEGER"]): | |
return match(["T_LPAREN"]) and match(["T_RPAREN"]) | |
elif match(["T_READLINE"]): | |
return match(["T_LPAREN"]) and match(["T_RPAREN"]) | |
elif match(["T_NEW"]): | |
return match(["T_IDENT"]) | |
elif match(["T_NEWARRAY"]): | |
return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \ | |
self.Type() and match(["T_RPAREN"]) | |
def LC(self): | |
print("LC()", tokens[current_index]) | |
if match(["T_ASSIGN"]): | |
return self.Expr0() | |
elif match(["T_DOT"]): | |
return match(["T_IDENT"]) and self.GC() | |
elif match(["T_LSB"]): | |
return self.Expr() and match(["T_RSB"]) and match(["T_ASSIGN"]) and self.Expr0() | |
elif match(["T_LPAREN"]): | |
return self.Actuals() and match(["T_RPAREN"]) | |
return False | |
def GC(self): | |
print("GC()", tokens[current_index]) | |
if peek().type in ["T_ASSIGN"]: | |
return self.Expr0() | |
elif match(["L_PAREN"]): | |
return self.Actuals() and match(["R_PAREN"]) | |
return False | |
def Expr0(self): | |
print("Expr0()", tokens[current_index]) | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr1() and self.Expr0() | |
return False | |
def Expr0_Prime(self): | |
print("Expr0`()", tokens[current_index]) | |
if match(["T_OR"]): | |
return self.Expr1() and self.Expr0_Prime() | |
else: | |
return True | |
def Expr1(self): | |
print("Expr1()", tokens[current_index]) | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr2() and self.Expr1_Prime() | |
return False | |
def Expr1_Prime(self): | |
print("Expr1`()", tokens[current_index]) | |
if match(["T_AND"]): | |
return self.Expr2() and self.Expr1_Prime() | |
else: | |
return True | |
def Expr2(self): | |
print("Expr2()", tokens[current_index]) | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr3() and self.Expr2_Prime() | |
return False | |
def Expr2_Prime(self): | |
print("Expr2`()", tokens[current_index]) | |
if match(["T_NEQ"]): | |
return self.Expr3() and self.Expr2_Prime() | |
else: | |
return True | |
def Expr3(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr4() and self.Expr3_Prime() | |
return False | |
def Expr3_Prime(self): | |
if match(["T_EQ"]): | |
return self.Expr4() and self.Expr3_Prime() | |
else: | |
return True | |
def Expr4(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr5() and self.Expr4_Prime() | |
return False | |
def Expr4_Prime(self): | |
if match(["T_GEQ"]): | |
return self.Expr5() and self.Expr4_Prime() | |
else: | |
return True | |
def Expr5(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr6() and self.Expr5_Prime() | |
return False | |
def Expr5_Prime(self): | |
if match(["T_GT"]): | |
return self.Expr6() and self.Expr5_Prime() | |
else: | |
return True | |
def Expr6(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr7() and self.Expr6_Prime() | |
return False | |
def Expr6_Prime(self): | |
if match(["T_LEQ"]): | |
return self.Expr7() and self.Expr6_Prime() | |
else: | |
return True | |
def Expr7(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr8() and self.Expr7_Prime() | |
return False | |
def Expr7_Prime(self): | |
if match(["T_LT"]): | |
return self.Expr8() and self.Expr7_Prime() | |
else: | |
return True | |
def Expr8(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr9() and self.Expr8_Prime() | |
return False | |
def Expr8_Prime(self): | |
if match(["T_MINUS"]): | |
return self.Expr9() and self.Expr8_Prime() | |
else: | |
return True | |
def Expr9(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr10() and self.Expr9_Prime() | |
return False | |
def Expr9_Prime(self): | |
if match(["T_PLUS"]): | |
return self.Expr10() and self.Expr9_Prime() | |
else: | |
return True | |
def Expr10(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr11() and self.Expr10_Prime() | |
return False | |
def Expr10_Prime(self): | |
if match(["T_MOD"]): | |
return self.Expr11() and self.Expr10_Prime() | |
else: | |
return True | |
def Expr11(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr12() and self.Expr11_Prime() | |
return False | |
def Expr11_Prime(self): | |
if match(["T_DIV"]): | |
return self.Expr12() and self.Expr11_Prime() | |
else: | |
return True | |
def Expr12(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Expr13() and self.Expr12_Prime() | |
return False | |
def Expr12_Prime(self): | |
if match(["T_MULT"]): | |
return self.Expr13() and self.Expr12_Prime() | |
else: | |
return True | |
def Expr13(self): | |
if match(["T_IDENT"]): | |
return self.FC() | |
elif peek().type in FirstSet["Constant"]: | |
return self.Constant() | |
elif match(["T_THIS"]): | |
return True | |
elif match(["T_LPAREN"]): | |
return self.Expr() and match(["T_RPAREN"]) | |
elif match(["T_NOT"]): | |
return self.Expr() | |
elif match(["T_READINTEGER"]): | |
return match(["T_LPAREN"]) and match(["T_RPAREN"]) | |
elif match(["T_READLINE"]): | |
return match(["T_LPAREN"]) and match(["T_RPAREN"]) | |
elif match(["T_NEW"]): | |
return match(["T_IDENT"]) | |
elif match(["T_NEWARRAY"]): | |
return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \ | |
self.Type() and match(["T_RPAREN"]) | |
def FC(self): | |
if match(["T_DOT"]): | |
return match(["T_IDENT"]) and self.FC_Prime() | |
elif match(["T_LSB"]): | |
return self.Expr() and match(["T_RSB"]) | |
elif match(["T_LPAREN"]): | |
return self.Actuals() and match(["T_RPAREN"]) | |
else: | |
return True | |
def FC_Prime(self): | |
if match(["T_LPAREN"]): | |
return self.Actuals() and match(["T_RPAREN"]) | |
else: | |
return True | |
def Actuals(self): | |
if peek().type in FirstSet["Actuals"]: | |
return self.Pr() | |
else: | |
return True | |
def Constant(self): | |
return match(FirstSet["Constant"]) | |
def main(): | |
g = Grammar() | |
print(g.Program()) | |
print(current_index) | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[1]: | |
import string | |
import sys | |
import logging | |
import pickle | |
logging.basicConfig(level=logging.CRITICAL) | |
filename = "" | |
code = "" | |
LINE_NO = 1 | |
if len(sys.argv) == 2: | |
filename = sys.argv[1] | |
f = open(filename, "r") | |
code = f.read() | |
else: | |
print("Unknown # of args") | |
sys.exit(-1) | |
def preprocess(_code): | |
output = _code | |
while "/*" in output: | |
output = output[:output.index("/*")] + output[output.index("*/") + 2:] | |
while "//" in output: | |
output = output[:output.index( | |
"//")] + output[output.index("\n", output.index("//")):] | |
return output | |
code = preprocess(code) | |
RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this", | |
"extends", "implements", "for", "while", "if", "else", "return", "break", "new", | |
"NewArray", "Print", "ReadInteger", "ReadLine"] | |
# In[3]: | |
class Token: | |
def __init__(self, _type, value=None, priority=0): | |
self.type = _type | |
self.value = value | |
self.priority = priority | |
global LINE_NO | |
self.line_no = LINE_NO | |
def __repr__(self): | |
if not self.value: | |
return self.type + " AT Line#" + str(self.line_no) | |
return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no) | |
# In[4]: | |
def is_reserve(s): | |
return s in RESERVED_WORDS | |
def is_identifier(s): | |
if s and s[0] in string.ascii_letters and len(s) < 32: | |
for c in s[1:]: | |
if not (c.isalnum() or c == '_'): | |
return False | |
return True | |
else: | |
return False | |
def is_non_resv_non_ident(s): | |
return not is_reserve(s) and not is_identifier(s) | |
def is_white_space(s): | |
return s.isspace() | |
def is_integer(s): | |
if s[:2] in ["0x", "0X"] and all(c in string.hexdigits for c in s[2:]): | |
return True | |
if s.isdigit(): | |
return True | |
return False | |
def is_string(s): | |
if s.count('"') == 2 and s[0] == '"' and s[-1] == '"' and '"' not in s[1:-1] and '\n' not in s: | |
return True | |
return False | |
def is_double(s): | |
if s[0].isdigit() and s.count(".") == 1: | |
if s.count("E") == 1 and s.index("E") > s.index(".") and s[s.index("E")+1:]: | |
if all(c in string.digits for c in s[s.index("E") + 1:]): | |
return True | |
elif s[s.index("E")+1] in ["+", "-"] and all(c in string.digits for c in s[s.index("E")+2:]): | |
return True | |
elif "E" not in s and all(c in string.digits for c in s[s.index(".")+1:]): | |
return True | |
return False | |
# In[5]: | |
def all_tokens(): | |
tokens = [] | |
for resr in RESERVED_WORDS: | |
tokens.append(Token("T_" + resr.upper())) | |
tokens.append(Token("T_IDENT")) | |
tokens.append(Token("T_BOOL")) | |
tokens.append(Token("T_INTCONSTANT")) | |
tokens.append(Token("T_STRINGCONSTANT")) | |
tokens.append(Token("T_DOUBLECONSTANT")) | |
tokens.append(Token("T_PLUS")) | |
tokens.append(Token("T_MINUS")) | |
tokens.append(Token("T_MULT")) | |
tokens.append(Token("T_DIV")) | |
tokens.append(Token("T_MOD")) | |
tokens.append(Token("T_LT")) | |
tokens.append(Token("T_LEQ")) | |
tokens.append(Token("T_GT")) | |
tokens.append(Token("T_GEQ")) | |
tokens.append(Token("T_ASSIGN")) | |
tokens.append(Token("T_EQ")) | |
tokens.append(Token("T_NEQ")) | |
tokens.append(Token("T_AND")) | |
tokens.append(Token("T_OR")) | |
tokens.append(Token("T_NOT")) | |
tokens.append(Token("T_SEMICOLON")) | |
tokens.append(Token("T_COMMA")) | |
tokens.append(Token("T_DOT")) | |
tokens.append(Token("T_ARRDECL")) | |
tokens.append(Token("T_LSB")) | |
tokens.append(Token("T_RSB")) | |
tokens.append(Token("T_LPAREN")) | |
tokens.append(Token("T_RPAREN")) | |
tokens.append(Token("T_LCB")) | |
tokens.append(Token("T_RCB")) | |
return tokens | |
def ret_token(s): | |
tokens = [] | |
if is_reserve(s): | |
tokens.append(Token("T_" + s.upper(), value=None, priority=999)) | |
if is_identifier(s): | |
tokens.append(Token("T_IDENT", s)) | |
if s in ["true", "false"]: | |
tokens.append(Token("T_BOOL", s, 998)) | |
if is_integer(s): | |
tokens.append(Token("T_INTCONSTANT", s)) | |
if is_string(s): | |
tokens.append(Token("T_STRINGCONSTANT", s)) | |
if is_double(s): | |
tokens.append(Token("T_DOUBLECONSTANT", s)) | |
if s == "+": | |
tokens.append(Token("T_PLUS")) | |
if s == "-": | |
tokens.append(Token("T_MINUS")) | |
if s == "*": | |
tokens.append(Token("T_MULT")) | |
if s == "/": | |
tokens.append(Token("T_DIV")) | |
if s == "%": | |
tokens.append(Token("T_MOD")) | |
if s == "<": | |
tokens.append(Token("T_LT")) | |
if s == "<=": | |
tokens.append(Token("T_LEQ")) | |
if s == ">": | |
tokens.append(Token("T_GT")) | |
if s == ">=": | |
tokens.append(Token("T_GEQ")) | |
if s == "=": | |
tokens.append(Token("T_ASSIGN")) | |
if s == "==": | |
tokens.append(Token("T_EQ")) | |
if s == "!=": | |
tokens.append(Token("T_NEQ")) | |
if s == "&&": | |
tokens.append(Token("T_AND")) | |
if s == "||": | |
tokens.append(Token("T_OR")) | |
if s == "!": | |
tokens.append(Token("T_NOT")) | |
if s == ";": | |
tokens.append(Token("T_SEMICOLON")) | |
if s == ",": | |
tokens.append(Token("T_COMMA")) | |
if s == ".": | |
tokens.append(Token("T_DOT")) | |
if s == "[]": | |
tokens.append(Token("T_ARRDECL")) | |
if s == "[": | |
tokens.append(Token("T_LSB")) | |
if s == "]": | |
tokens.append(Token("T_RSB")) | |
if s == "(": | |
tokens.append(Token("T_LPAREN")) | |
if s == ")": | |
tokens.append(Token("T_RPAREN")) | |
if s == "{": | |
tokens.append(Token("T_LCB")) | |
if s == "}": | |
tokens.append(Token("T_RCB")) | |
return tokens | |
# In[ ]: | |
# In[6]: | |
OUTPUT = [] | |
identified = {} | |
stack = "" | |
for _cindex, char in enumerate(code): | |
if char in string.whitespace and '"' not in stack: | |
if char == "\n": | |
LINE_NO += 1 | |
logging.debug("--------- Whitespace occur ----------") | |
if identified: | |
OUTPUT.append( | |
identified[max(identified, key=lambda k: identified[k].priority)]) | |
logging.debug("Remaining {}".format(code[_cindex:])) | |
identified.clear() | |
stack = "" | |
logging.debug("Stack = {}".format(stack)) | |
logging.debug("--------- Whitespace end ----------") | |
continue | |
stack += char | |
logging.debug("\nSTACK {}".format(stack)) | |
identified_tokens = ret_token(stack) | |
logging.debug("LATEST_TOKENS {}".format(identified_tokens)) | |
if not identified_tokens and identified: | |
OUTPUT.append( | |
identified[max(identified, key=lambda k: identified[k].priority)]) | |
logging.debug("OUTPUT {}".format(OUTPUT)) | |
logging.debug("Remaining: {}".format(code[_cindex:])) | |
identified.clear() | |
stack = stack[-1] | |
logging.debug("Stack {}".format(stack)) | |
identified_tokens = ret_token(stack) | |
logging.debug("LATEST_TOKENS = {}".format(identified_tokens)) | |
identified.clear() | |
for identified_token in identified_tokens: | |
if identified_token.type in identified.keys(): | |
identified[identified_token.type] = identified_token | |
else: | |
identified[identified_token.type] = identified_token | |
logging.debug("IDENTIFIED = {}".format(identified)) | |
if _cindex == len(code) - 1: | |
OUTPUT.append( | |
identified[max(identified, key=lambda k: identified[k].priority)]) | |
logging.debug("STACK {}".format(stack)) | |
print("OUTPUT {}".format(OUTPUT)) | |
output_file = open(sys.argv[1] + ".tokens", "wb") | |
pickle.dump(OUTPUT, output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment