ahmedbilal · November 25, 2018 16:41
diff --git a/parser.py b/parser.py
 import pickle
 import sys

 """
 |------------------------------------------------------|
 |Grammar notation  |	Code representation            |
 |------------------|-----------------------------------|
 |Terminal	       |  Code to match and consume a token|
 |Nonterminal	   |  Call to that rule’s function     |
 |    |	           |  If or switch statement           |
 |  * or +	       |  While or for loop                |
 |    ?	           |  If statement                     |
 |------------------------------------------------------|
 """
 RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this",
                  "extends", "implements", "for", "while", "if", "else", "return", "break", "new",
                  "NewArray", "Print", "ReadInteger", "ReadLine"]

 class Token:
    def __init__(self, _type, value=None, priority=0):
        self.type = _type
        self.value = value
        self.priority = priority
        self.line_no = 0

    def __repr__(self):
        if not self.value:
            return self.type + " AT Line#" + str(self.line_no)
        return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no)


 def ret_all_tokens():
    tokens = []
    for resr in RESERVED_WORDS:
        tokens.append(Token("T_" + resr.upper()))

    tokens.append(Token("T_ID"))
    tokens.append(Token("T_BOOL"))
    tokens.append(Token("T_INTCONSTANT"))
    tokens.append(Token("T_STRINGCONSTANT"))
    tokens.append(Token("T_DOUBLECONSTANT"))
    tokens.append(Token("T_PLUS"))
    tokens.append(Token("T_MINUS"))
    tokens.append(Token("T_MULT"))
    tokens.append(Token("T_DIV"))
    tokens.append(Token("T_MOD"))
    tokens.append(Token("T_LT"))
    tokens.append(Token("T_LEQ"))
    tokens.append(Token("T_GT"))
    tokens.append(Token("T_GEQ"))
    tokens.append(Token("T_ASSIGN"))
    tokens.append(Token("T_EQ"))
    tokens.append(Token("T_NEQ"))
    tokens.append(Token("T_AND"))
    tokens.append(Token("T_OR"))
    tokens.append(Token("T_NOT"))
    tokens.append(Token("T_SEMICOLON"))
    tokens.append(Token("T_COMMA"))
    tokens.append(Token("T_DOT"))
    tokens.append(Token("T_ARRDECL"))
    tokens.append(Token("T_LSB"))
    tokens.append(Token("T_RSB"))
    tokens.append(Token("T_LPAREN"))
    tokens.append(Token("T_RPAREN"))
    tokens.append(Token("T_LCB"))
    tokens.append(Token("T_RCB"))

    return tokens

 EOF = Token("T_EOF")  # End-Of-File Token

 tokens = None  # tokens read from file

 if len(sys.argv) == 2:
    filename = sys.argv[1]
    f = open(filename, "rb")
    tokens = pickle.load(f)
    tokens.append(EOF)
 else:
    print("Unknown # of args")
    sys.exit(-1)

 current_index = 0


 def is_at_end():
    return peek() == EOF


 def peek():
    # print("peek", tokens[current_index])
    return tokens[current_index]


 def previous():
    # print("previous", tokens[current_index - 1])
    return tokens[current_index - 1]


 def check(_type):
    if is_at_end():
        return False
    # print("check", "peek().type == _type", peek().type, _type, peek().type == _type)
    return peek().type == _type


 def advance():
    global current_index
    if not is_at_end():
        current_index += 1
    return previous()


 def match(types):
    for _t in types:
        if check(_t):
            advance()
            # print("matched")
            return True
    return False

 FirstSet = dict()
 FirstSet["Type"] = ["T_INT", "T_DOUBLE", "T_BOOL", "T_STRING", "T_IDENT"]
 FirstSet["Decl"] = FirstSet["Type"] + ["T_VOID", "T_CLASS", "T_INTERFACE"]
 FirstSet["Actuals"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_READINTEGER", "T_READLINE",
                       "T_BOOLCONSTANT", "T_DOUBLECONSTANT", "T_IDENT", "T_INTCONSTANT", "T_NEW",
                       "T_NULL","T_STRINGCONSTANT","T_THIS"]
 FirstSet["P"] = FirstSet["Type"] + ["T_VOID"]
 FirstSet["Stmt"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_PRINT", "T_READINTEGER", "T_READLINE", "T_BOOLCONSTANT",
                    "T_BREAK", "T_DOUBLECONSTANT", "T_FOR", "T_IDENT", "T_IF",
                    "T_INTCONSTANT","T_NEW","T_NULL","T_RETURN", "T_STRINGCONSTANT", "T_THIS", "T_WHILE", "T_LCB"]
 FirstSet["Constant"] = ["T_INTCONSTANT", "T_DOUBLECONSTANT", "T_BOOLCONSTANT", "T_STRINGCONSTANT", "T_NULL"]

 class Grammar(object):
    def Program(self):
        print("Program()", tokens[current_index])
        return self.Decl() and self.Program_Prime()

    def Program_Prime(self):
        print("Program`()", tokens[current_index])

        if peek().type in FirstSet["Decl"]:
            return self.Decl() and self.Program_Prime()
        else:
            return True

    def Decl(self):
        print("Decl()", tokens[current_index])

        if match(["T_VOID"]):
            return self.FunctionDecl()

        elif match(["T_CLASS"]):
            return self.ClassDecl()

        elif match(["T_INTERFACE"]):
            return self.InterfaceDecl()

        elif peek().type in FirstSet["Type"]:
            return self.Type() and match(["T_IDENT"]) and self.VF()

        return True

    def VF(self):
        print("VF()", tokens[current_index])

        if check("T_SEMICOLON"):
            return self.VariableDecl()
        elif check("T_LPAREN"):
            return self.FunctionDecl()

        return False

    def VariableDecl(self):
        print("VariableDecl()", tokens[current_index])

        return match(["T_SEMICOLON"])

    def Variable(self):
        print("Variable()", tokens[current_index])

        return self.Type() and match(["T_IDENT"])

    def Type(self):
        print("Type()", tokens[current_index])

        return match(FirstSet["Type"]) and self.Type_Prime()

    def Type_Prime(self):
        print("Type_Prime()", tokens[current_index])

        if match(["T_LSB"]):
            return match(["T_RSB"]) and self.Type_Prime()
        else:
            return True

    def FunctionDecl(self):
        print("FunctionDecl()", tokens[current_index])

        return match(["T_IDENT"]) and match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and self.StmtBlock()

    def Formals(self):
        print("Formals()", tokens[current_index])

        if peek().type in FirstSet["Type"]:
            return self.Parameters()
        return True

    def Parameters(self):
        print("Parameters()", tokens[current_index])

        return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.Parameters_Prime()

    def Parameters_Prime(self):
        print("Parameters_Prime()", tokens[current_index])

        if match(["T_COMMA"]):
            return self.Variable() and self.Parameters_Prime()
        return True

    def ClassDecl(self):
        print("ClassDecl()", tokens[current_index])

        return match(["T_CLASS"]) and match(["T_IDENT"]) and self.Extend() and self.Implement() and match(["T_LCB"]) \
               and self.F() and match(["T_RCB"])

    def Extend(self):
        print("Extend()", tokens[current_index])

        if match(["T_EXTENDS"]):
            return match(["T_IDENT"])
        return True

    def Implement(self):
        print("Implements()", tokens[current_index])

        if match(["T_IMPLEMENTS"]):
            return self.K()
        return True

    def F(self):
        print("F()", tokens[current_index])

        if match(FirstSet["Type"]):
            return self.Field() and self.F()
        return True

    def K(self):
        print("K()", tokens[current_index])

        return match(["T_IDENT"]) and self.K_Prime()

    def K_Prime(self):
        print("K`()", tokens[current_index])

        if match(["T_COMMA"]):
            return match(["T_IDENT"]) and self.K_Prime()
        return True

    def Field(self):
        print("Field()", tokens[current_index])

        return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.VF()

    def InterfaceDecl(self):
        print("InterfaceDecl()", tokens[current_index])

        return match(["T_INTERFACE"]) and match(["T_IDENT"]) and match(["T_LCB"]) and self.P() and match(["T_RCB"])

    def P(self):
        print("P()", tokens[current_index])

        if peek().type in FirstSet["P"]:
            return self.Prototype() and self.P()
        return True

    def Prototype(self):
        print("Prototype()", tokens[current_index])

        return match(FirstSet["P"]) and self.Type_Prime() and match(["T_IDENT"]) and \
            match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and match(["T_SEMICOLON"])

    def StmtBlock(self):
        print("StmtBlock()", tokens[current_index])

        return match(["T_LCB"]) and self.V() and self.S() and match(["T_RCB"])

    def V(self):
        print("V()", tokens[current_index])

        if match(FirstSet["Type"]):
            return self.Type_Prime() and match(["T_IDENT"]) and self.VariableDecl() and self.V()
        return True

    def S(self):
        print("S()", tokens[current_index])

        if peek().type in FirstSet["Stmt"]:
            return self.Stmt() and self.S()
        return True

    def Stmt(self):
        print("Stmt()", tokens[current_index])

        if peek().type in FirstSet["Expr"]:
            return self.Expr() and match(["T_SEMICOLON"])
        elif peek().type == "T_IF":
            return self.IfStmt()
        elif peek().type == "T_WHILE":
            return self.WhileStmt()
        elif peek().type == "T_FOR":
            return self.ForStmt()
        elif peek().type == "T_BREAK":
            return self.BreakStmt()
        elif peek().type == "T_RETURN":
            return self.ReturnStmt()
        elif peek().type == "T_PRINT":
            return self.PrintStmt()
        elif match(["T_LCB"]):
            return self.V() and self.S() and match(["T_RCB"])

        return False

    def E(self):
        print("StEmt()", tokens[current_index])

        if peek().type in FirstSet["Expr"]:
            return self.Expr()
        return True

    def IfStmt(self):
        print("IfStmt()", tokens[current_index])

        return match(["T_IF"]) and match(["T_LPAREN"]) and self.Expr() and match(["R_PAREN"]) and \
               self.Stmt() and self.Else()

    def Else(self):
        print("Else()", tokens[current_index])

        if match(["T_ELSE"]):
            return self.Stmt()
        return True

    def WhileStmt(self):
        print("WhileStmt()", tokens[current_index])

        return match(["T_WHILE"]) and match(["T_LPAREN"]) and self.Expr() and match(["T_RPAREN"]) and self.Stmt()

    def ForStmt(self):
        print("ForStmt()", tokens[current_index])

        return match(["T_FOR"]) and match(["T_LPAREN"]) and self.E() and match(["T_SEMICOLON"]) and \
               self.Expr() and match(["T_SEMICOLON"]) and self.E() and match(["T_RPAREN"]) and self.Stmt()

    def ReturnStmt(self):
        print("ReturnStmt()", tokens[current_index])

        return match(["T_RETURN"]) and self.E() and match(["T_SEMICOLON"])

    def BreakStmt(self):
        print("BreakStmt()", tokens[current_index])

        return match(["T_BREAK"]) and match(["T_SEMICOLON"])

    def PrintStmt(self):
        print("PrintStmt()", tokens[current_index])

        return match(["T_PRINT"]) and match(["T_LPAREN"]) and self.Pr() and \
               match(["T_RPAREN"]) and match(["T_SEMICOLON"])

    def Pr(self):
        print("Pr()", tokens[current_index])

        if peek().type in FirstSet["Expr"]:
            return self.Expr() and self.Pr_Prime()
        return False

    def Pr_Prime(self):
        print("Pr_Prime()", tokens[current_index])

        if match(["T_COMMA"]):
            return self.Expr() and self.Pr_Prime()
        return True

    def Expr(self):
        print("Expr()", tokens[current_index])

        if match(["T_IDENT"]):
            return self.LC()
        elif match(FirstSet["Constant"]):
            return self.Constant()
        elif match(["T_THIS"]):
            return True
        elif match(["T_LPAREN"]):
            return self.Expr() and match(["T_RPAREN"])
        elif match(["T_NOT"]):
            return self.Expr()
        elif match(["T_READINTEGER"]):
            return match(["T_LPAREN"]) and match(["T_RPAREN"])
        elif match(["T_READLINE"]):
            return match(["T_LPAREN"]) and match(["T_RPAREN"])
        elif match(["T_NEW"]):
            return match(["T_IDENT"])
        elif match(["T_NEWARRAY"]):
            return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \
                   self.Type() and match(["T_RPAREN"])

    def LC(self):
        print("LC()", tokens[current_index])

        if match(["T_ASSIGN"]):
            return self.Expr0()
        elif match(["T_DOT"]):
            return match(["T_IDENT"]) and self.GC()
        elif match(["T_LSB"]):
            return self.Expr() and match(["T_RSB"]) and match(["T_ASSIGN"]) and self.Expr0()
        elif match(["T_LPAREN"]):
            return self.Actuals() and match(["T_RPAREN"])

        return False

    def GC(self):
        print("GC()", tokens[current_index])

        if peek().type in ["T_ASSIGN"]:
            return self.Expr0()
        elif match(["L_PAREN"]):
            return self.Actuals() and match(["R_PAREN"])
        return False

    def Expr0(self):
        print("Expr0()", tokens[current_index])

        if peek().type in FirstSet["Actuals"]:
            return self.Expr1() and self.Expr0()
        return False

    def Expr0_Prime(self):
        print("Expr0`()", tokens[current_index])

        if match(["T_OR"]):
            return self.Expr1() and self.Expr0_Prime()
        else:
            return True

    def Expr1(self):
        print("Expr1()", tokens[current_index])

        if peek().type in FirstSet["Actuals"]:
            return self.Expr2() and self.Expr1_Prime()
        return False

    def Expr1_Prime(self):
        print("Expr1`()", tokens[current_index])

        if match(["T_AND"]):
            return self.Expr2() and self.Expr1_Prime()
        else:
            return True

    def Expr2(self):
        print("Expr2()", tokens[current_index])

        if peek().type in FirstSet["Actuals"]:
            return self.Expr3() and self.Expr2_Prime()
        return False

    def Expr2_Prime(self):
        print("Expr2`()", tokens[current_index])

        if match(["T_NEQ"]):
            return self.Expr3() and self.Expr2_Prime()
        else:
            return True

    def Expr3(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr4() and self.Expr3_Prime()
        return False

    def Expr3_Prime(self):
        if match(["T_EQ"]):
            return self.Expr4() and self.Expr3_Prime()
        else:
            return True

    def Expr4(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr5() and self.Expr4_Prime()
        return False

    def Expr4_Prime(self):
        if match(["T_GEQ"]):
            return self.Expr5() and self.Expr4_Prime()
        else:
            return True

    def Expr5(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr6() and self.Expr5_Prime()
        return False

    def Expr5_Prime(self):
        if match(["T_GT"]):
            return self.Expr6() and self.Expr5_Prime()
        else:
            return True

    def Expr6(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr7() and self.Expr6_Prime()
        return False

    def Expr6_Prime(self):
        if match(["T_LEQ"]):
            return self.Expr7() and self.Expr6_Prime()
        else:
            return True

    def Expr7(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr8() and self.Expr7_Prime()
        return False

    def Expr7_Prime(self):
        if match(["T_LT"]):
            return self.Expr8() and self.Expr7_Prime()
        else:
            return True

    def Expr8(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr9() and self.Expr8_Prime()
        return False

    def Expr8_Prime(self):
        if match(["T_MINUS"]):
            return self.Expr9() and self.Expr8_Prime()
        else:
            return True

    def Expr9(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr10() and self.Expr9_Prime()
        return False

    def Expr9_Prime(self):
        if match(["T_PLUS"]):
            return self.Expr10() and self.Expr9_Prime()
        else:
            return True

    def Expr10(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr11() and self.Expr10_Prime()
        return False

    def Expr10_Prime(self):
        if match(["T_MOD"]):
            return self.Expr11() and self.Expr10_Prime()
        else:
            return True

    def Expr11(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr12() and self.Expr11_Prime()
        return False

    def Expr11_Prime(self):
        if match(["T_DIV"]):
            return self.Expr12() and self.Expr11_Prime()
        else:
            return True

    def Expr12(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Expr13() and self.Expr12_Prime()
        return False

    def Expr12_Prime(self):
        if match(["T_MULT"]):
            return self.Expr13() and self.Expr12_Prime()
        else:
            return True

    def Expr13(self):
        if match(["T_IDENT"]):
            return self.FC()
        elif peek().type in FirstSet["Constant"]:
            return self.Constant()
        elif match(["T_THIS"]):
            return True
        elif match(["T_LPAREN"]):
            return self.Expr() and match(["T_RPAREN"])
        elif match(["T_NOT"]):
            return self.Expr()
        elif match(["T_READINTEGER"]):
            return match(["T_LPAREN"]) and match(["T_RPAREN"])
        elif match(["T_READLINE"]):
            return match(["T_LPAREN"]) and match(["T_RPAREN"])
        elif match(["T_NEW"]):
            return match(["T_IDENT"])
        elif match(["T_NEWARRAY"]):
            return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \
                   self.Type() and match(["T_RPAREN"])

    def FC(self):
        if match(["T_DOT"]):
            return match(["T_IDENT"]) and self.FC_Prime()
        elif match(["T_LSB"]):
            return self.Expr() and match(["T_RSB"])
        elif match(["T_LPAREN"]):
            return self.Actuals() and match(["T_RPAREN"])
        else:
            return True

    def FC_Prime(self):
        if match(["T_LPAREN"]):
            return self.Actuals() and match(["T_RPAREN"])
        else:
            return True

    def Actuals(self):
        if peek().type in FirstSet["Actuals"]:
            return self.Pr()
        else:
            return True

    def Constant(self):
        return match(FirstSet["Constant"])


 def main():
    g = Grammar()
    print(g.Program())
    print(current_index)

 main()
diff --git a/scanner.py b/scanner.py
 #!/usr/bin/env python
 # coding: utf-8

 # In[1]:


 import string
 import sys
 import logging
 import pickle

 logging.basicConfig(level=logging.CRITICAL)

 filename = ""
 code = ""
 LINE_NO = 1


 if len(sys.argv) == 2:
    filename = sys.argv[1]
    f = open(filename, "r")
    code = f.read()
 else:
    print("Unknown # of args")
    sys.exit(-1)


 def preprocess(_code):
    output = _code
    while "/*" in output:
        output = output[:output.index("/*")] + output[output.index("*/") + 2:]

    while "//" in output:
        output = output[:output.index(
            "//")] + output[output.index("\n", output.index("//")):]
    return output


 code = preprocess(code)

 RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this",
                  "extends", "implements", "for", "while", "if", "else", "return", "break", "new",
                  "NewArray", "Print", "ReadInteger", "ReadLine"]
 # In[3]:


 class Token:
    def __init__(self, _type, value=None, priority=0):
        self.type = _type
        self.value = value
        self.priority = priority
        global LINE_NO
        self.line_no = LINE_NO

    def __repr__(self):
        if not self.value:
            return self.type + " AT Line#" + str(self.line_no)
        return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no)


 # In[4]:


 def is_reserve(s):
    return s in RESERVED_WORDS


 def is_identifier(s):
    if s and s[0] in string.ascii_letters and len(s) < 32:
        for c in s[1:]:
            if not (c.isalnum() or c == '_'):
                return False
        return True
    else:
        return False


 def is_non_resv_non_ident(s):
    return not is_reserve(s) and not is_identifier(s)


 def is_white_space(s):
    return s.isspace()


 def is_integer(s):
    if s[:2] in ["0x", "0X"] and all(c in string.hexdigits for c in s[2:]):
        return True
    if s.isdigit():
        return True
    return False


 def is_string(s):
    if s.count('"') == 2 and s[0] == '"' and s[-1] == '"' and '"' not in s[1:-1] and '\n' not in s:
        return True
    return False


 def is_double(s):
    if s[0].isdigit() and s.count(".") == 1:
        if s.count("E") == 1 and s.index("E") > s.index(".") and s[s.index("E")+1:]:
            if all(c in string.digits for c in s[s.index("E") + 1:]):
                return True
            elif s[s.index("E")+1] in ["+", "-"] and all(c in string.digits for c in s[s.index("E")+2:]):
                return True
        elif "E" not in s and all(c in string.digits for c in s[s.index(".")+1:]):
            return True
    return False


 # In[5]:
 def all_tokens():
    tokens = []
    for resr in RESERVED_WORDS:
        tokens.append(Token("T_" + resr.upper()))

    tokens.append(Token("T_IDENT"))
    tokens.append(Token("T_BOOL"))
    tokens.append(Token("T_INTCONSTANT"))
    tokens.append(Token("T_STRINGCONSTANT"))
    tokens.append(Token("T_DOUBLECONSTANT"))
    tokens.append(Token("T_PLUS"))
    tokens.append(Token("T_MINUS"))
    tokens.append(Token("T_MULT"))
    tokens.append(Token("T_DIV"))
    tokens.append(Token("T_MOD"))
    tokens.append(Token("T_LT"))
    tokens.append(Token("T_LEQ"))
    tokens.append(Token("T_GT"))
    tokens.append(Token("T_GEQ"))
    tokens.append(Token("T_ASSIGN"))
    tokens.append(Token("T_EQ"))
    tokens.append(Token("T_NEQ"))
    tokens.append(Token("T_AND"))
    tokens.append(Token("T_OR"))
    tokens.append(Token("T_NOT"))
    tokens.append(Token("T_SEMICOLON"))
    tokens.append(Token("T_COMMA"))
    tokens.append(Token("T_DOT"))
    tokens.append(Token("T_ARRDECL"))
    tokens.append(Token("T_LSB"))
    tokens.append(Token("T_RSB"))
    tokens.append(Token("T_LPAREN"))
    tokens.append(Token("T_RPAREN"))
    tokens.append(Token("T_LCB"))
    tokens.append(Token("T_RCB"))

    return tokens


 def ret_token(s):
    tokens = []
    if is_reserve(s):
        tokens.append(Token("T_" + s.upper(), value=None, priority=999))

    if is_identifier(s):
        tokens.append(Token("T_IDENT", s))

    if s in ["true", "false"]:
        tokens.append(Token("T_BOOL", s, 998))

    if is_integer(s):
        tokens.append(Token("T_INTCONSTANT", s))

    if is_string(s):
        tokens.append(Token("T_STRINGCONSTANT", s))

    if is_double(s):
        tokens.append(Token("T_DOUBLECONSTANT", s))

    if s == "+":
        tokens.append(Token("T_PLUS"))
    if s == "-":
        tokens.append(Token("T_MINUS"))
    if s == "*":
        tokens.append(Token("T_MULT"))
    if s == "/":
        tokens.append(Token("T_DIV"))
    if s == "%":
        tokens.append(Token("T_MOD"))
    if s == "<":
        tokens.append(Token("T_LT"))
    if s == "<=":
        tokens.append(Token("T_LEQ"))
    if s == ">":
        tokens.append(Token("T_GT"))
    if s == ">=":
        tokens.append(Token("T_GEQ"))
    if s == "=":
        tokens.append(Token("T_ASSIGN"))
    if s == "==":
        tokens.append(Token("T_EQ"))
    if s == "!=":
        tokens.append(Token("T_NEQ"))

    if s == "&&":
        tokens.append(Token("T_AND"))
    if s == "||":
        tokens.append(Token("T_OR"))
    if s == "!":
        tokens.append(Token("T_NOT"))

    if s == ";":
        tokens.append(Token("T_SEMICOLON"))
    if s == ",":
        tokens.append(Token("T_COMMA"))
    if s == ".":
        tokens.append(Token("T_DOT"))
    if s == "[]":
        tokens.append(Token("T_ARRDECL"))
    if s == "[":
        tokens.append(Token("T_LSB"))
    if s == "]":
        tokens.append(Token("T_RSB"))
    if s == "(":
        tokens.append(Token("T_LPAREN"))
    if s == ")":
        tokens.append(Token("T_RPAREN"))
    if s == "{":
        tokens.append(Token("T_LCB"))
    if s == "}":
        tokens.append(Token("T_RCB"))

    return tokens


 # In[ ]:


 # In[6]:
 OUTPUT = []
 identified = {}
 stack = ""


 for _cindex, char in enumerate(code):
    if char in string.whitespace and '"' not in stack:
        if char == "\n":
            LINE_NO += 1
        logging.debug("--------- Whitespace occur ----------")
        if identified:
            OUTPUT.append(
                identified[max(identified, key=lambda k: identified[k].priority)])
        logging.debug("Remaining {}".format(code[_cindex:]))

        identified.clear()
        stack = ""
        logging.debug("Stack = {}".format(stack))
        logging.debug("--------- Whitespace end ----------")

        continue

    stack += char
    logging.debug("\nSTACK {}".format(stack))
    identified_tokens = ret_token(stack)
    logging.debug("LATEST_TOKENS {}".format(identified_tokens))

    if not identified_tokens and identified:
        OUTPUT.append(
            identified[max(identified, key=lambda k: identified[k].priority)])
        logging.debug("OUTPUT {}".format(OUTPUT))
        logging.debug("Remaining: {}".format(code[_cindex:]))

        identified.clear()
        stack = stack[-1]
        logging.debug("Stack {}".format(stack))
        identified_tokens = ret_token(stack)
        logging.debug("LATEST_TOKENS = {}".format(identified_tokens))

    identified.clear()
    for identified_token in identified_tokens:
        if identified_token.type in identified.keys():
            identified[identified_token.type] = identified_token
        else:
            identified[identified_token.type] = identified_token
    logging.debug("IDENTIFIED = {}".format(identified))

    if _cindex == len(code) - 1:
        OUTPUT.append(
            identified[max(identified, key=lambda k: identified[k].priority)])

 logging.debug("STACK {}".format(stack))
 print("OUTPUT {}".format(OUTPUT))

 output_file = open(sys.argv[1] + ".tokens", "wb")
 pickle.dump(OUTPUT, output_file)
	import pickle
	import sys

	"""
	\|------------------------------------------------------\|
	\|Grammar notation \| Code representation \|
	\|------------------\|-----------------------------------\|
	\|Terminal \| Code to match and consume a token\|
	\|Nonterminal \| Call to that rule’s function \|
	\| \| \| If or switch statement \|
	\| * or + \| While or for loop \|
	\| ? \| If statement \|
	\|------------------------------------------------------\|
	"""
	RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this",
	"extends", "implements", "for", "while", "if", "else", "return", "break", "new",
	"NewArray", "Print", "ReadInteger", "ReadLine"]

	class Token:
	def __init__(self, _type, value=None, priority=0):
	self.type = _type
	self.value = value
	self.priority = priority
	self.line_no = 0

	def __repr__(self):
	if not self.value:
	return self.type + " AT Line#" + str(self.line_no)
	return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no)


	def ret_all_tokens():
	tokens = []
	for resr in RESERVED_WORDS:
	tokens.append(Token("T_" + resr.upper()))

	tokens.append(Token("T_ID"))
	tokens.append(Token("T_BOOL"))
	tokens.append(Token("T_INTCONSTANT"))
	tokens.append(Token("T_STRINGCONSTANT"))
	tokens.append(Token("T_DOUBLECONSTANT"))
	tokens.append(Token("T_PLUS"))
	tokens.append(Token("T_MINUS"))
	tokens.append(Token("T_MULT"))
	tokens.append(Token("T_DIV"))
	tokens.append(Token("T_MOD"))
	tokens.append(Token("T_LT"))
	tokens.append(Token("T_LEQ"))
	tokens.append(Token("T_GT"))
	tokens.append(Token("T_GEQ"))
	tokens.append(Token("T_ASSIGN"))
	tokens.append(Token("T_EQ"))
	tokens.append(Token("T_NEQ"))
	tokens.append(Token("T_AND"))
	tokens.append(Token("T_OR"))
	tokens.append(Token("T_NOT"))
	tokens.append(Token("T_SEMICOLON"))
	tokens.append(Token("T_COMMA"))
	tokens.append(Token("T_DOT"))
	tokens.append(Token("T_ARRDECL"))
	tokens.append(Token("T_LSB"))
	tokens.append(Token("T_RSB"))
	tokens.append(Token("T_LPAREN"))
	tokens.append(Token("T_RPAREN"))
	tokens.append(Token("T_LCB"))
	tokens.append(Token("T_RCB"))

	return tokens

	EOF = Token("T_EOF") # End-Of-File Token

	tokens = None # tokens read from file

	if len(sys.argv) == 2:
	filename = sys.argv[1]
	f = open(filename, "rb")
	tokens = pickle.load(f)
	tokens.append(EOF)
	else:
	print("Unknown # of args")
	sys.exit(-1)

	current_index = 0


	def is_at_end():
	return peek() == EOF


	def peek():
	# print("peek", tokens[current_index])
	return tokens[current_index]


	def previous():
	# print("previous", tokens[current_index - 1])
	return tokens[current_index - 1]


	def check(_type):
	if is_at_end():
	return False
	# print("check", "peek().type == _type", peek().type, _type, peek().type == _type)
	return peek().type == _type


	def advance():
	global current_index
	if not is_at_end():
	current_index += 1
	return previous()


	def match(types):
	for _t in types:
	if check(_t):
	advance()
	# print("matched")
	return True
	return False

	FirstSet = dict()
	FirstSet["Type"] = ["T_INT", "T_DOUBLE", "T_BOOL", "T_STRING", "T_IDENT"]
	FirstSet["Decl"] = FirstSet["Type"] + ["T_VOID", "T_CLASS", "T_INTERFACE"]
	FirstSet["Actuals"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_READINTEGER", "T_READLINE",
	"T_BOOLCONSTANT", "T_DOUBLECONSTANT", "T_IDENT", "T_INTCONSTANT", "T_NEW",
	"T_NULL","T_STRINGCONSTANT","T_THIS"]
	FirstSet["P"] = FirstSet["Type"] + ["T_VOID"]
	FirstSet["Stmt"] = ["T_NOT", "T_LPAREN", "T_NEWARRAY", "T_PRINT", "T_READINTEGER", "T_READLINE", "T_BOOLCONSTANT",
	"T_BREAK", "T_DOUBLECONSTANT", "T_FOR", "T_IDENT", "T_IF",
	"T_INTCONSTANT","T_NEW","T_NULL","T_RETURN", "T_STRINGCONSTANT", "T_THIS", "T_WHILE", "T_LCB"]
	FirstSet["Constant"] = ["T_INTCONSTANT", "T_DOUBLECONSTANT", "T_BOOLCONSTANT", "T_STRINGCONSTANT", "T_NULL"]

	class Grammar(object):
	def Program(self):
	print("Program()", tokens[current_index])
	return self.Decl() and self.Program_Prime()

	def Program_Prime(self):
	print("Program`()", tokens[current_index])

	if peek().type in FirstSet["Decl"]:
	return self.Decl() and self.Program_Prime()
	else:
	return True

	def Decl(self):
	print("Decl()", tokens[current_index])

	if match(["T_VOID"]):
	return self.FunctionDecl()

	elif match(["T_CLASS"]):
	return self.ClassDecl()

	elif match(["T_INTERFACE"]):
	return self.InterfaceDecl()

	elif peek().type in FirstSet["Type"]:
	return self.Type() and match(["T_IDENT"]) and self.VF()

	return True

	def VF(self):
	print("VF()", tokens[current_index])

	if check("T_SEMICOLON"):
	return self.VariableDecl()
	elif check("T_LPAREN"):
	return self.FunctionDecl()

	return False

	def VariableDecl(self):
	print("VariableDecl()", tokens[current_index])

	return match(["T_SEMICOLON"])

	def Variable(self):
	print("Variable()", tokens[current_index])

	return self.Type() and match(["T_IDENT"])

	def Type(self):
	print("Type()", tokens[current_index])

	return match(FirstSet["Type"]) and self.Type_Prime()

	def Type_Prime(self):
	print("Type_Prime()", tokens[current_index])

	if match(["T_LSB"]):
	return match(["T_RSB"]) and self.Type_Prime()
	else:
	return True

	def FunctionDecl(self):
	print("FunctionDecl()", tokens[current_index])

	return match(["T_IDENT"]) and match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and self.StmtBlock()

	def Formals(self):
	print("Formals()", tokens[current_index])

	if peek().type in FirstSet["Type"]:
	return self.Parameters()
	return True

	def Parameters(self):
	print("Parameters()", tokens[current_index])

	return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.Parameters_Prime()

	def Parameters_Prime(self):
	print("Parameters_Prime()", tokens[current_index])

	if match(["T_COMMA"]):
	return self.Variable() and self.Parameters_Prime()
	return True

	def ClassDecl(self):
	print("ClassDecl()", tokens[current_index])

	return match(["T_CLASS"]) and match(["T_IDENT"]) and self.Extend() and self.Implement() and match(["T_LCB"]) \
	and self.F() and match(["T_RCB"])

	def Extend(self):
	print("Extend()", tokens[current_index])

	if match(["T_EXTENDS"]):
	return match(["T_IDENT"])
	return True

	def Implement(self):
	print("Implements()", tokens[current_index])

	if match(["T_IMPLEMENTS"]):
	return self.K()
	return True

	def F(self):
	print("F()", tokens[current_index])

	if match(FirstSet["Type"]):
	return self.Field() and self.F()
	return True

	def K(self):
	print("K()", tokens[current_index])

	return match(["T_IDENT"]) and self.K_Prime()

	def K_Prime(self):
	print("K`()", tokens[current_index])

	if match(["T_COMMA"]):
	return match(["T_IDENT"]) and self.K_Prime()
	return True

	def Field(self):
	print("Field()", tokens[current_index])

	return match(FirstSet["Type"]) and self.Type_Prime() and match(["T_IDENT"]) and self.VF()

	def InterfaceDecl(self):
	print("InterfaceDecl()", tokens[current_index])

	return match(["T_INTERFACE"]) and match(["T_IDENT"]) and match(["T_LCB"]) and self.P() and match(["T_RCB"])

	def P(self):
	print("P()", tokens[current_index])

	if peek().type in FirstSet["P"]:
	return self.Prototype() and self.P()
	return True

	def Prototype(self):
	print("Prototype()", tokens[current_index])

	return match(FirstSet["P"]) and self.Type_Prime() and match(["T_IDENT"]) and \
	match(["T_LPAREN"]) and self.Formals() and match(["T_RPAREN"]) and match(["T_SEMICOLON"])

	def StmtBlock(self):
	print("StmtBlock()", tokens[current_index])

	return match(["T_LCB"]) and self.V() and self.S() and match(["T_RCB"])

	def V(self):
	print("V()", tokens[current_index])

	if match(FirstSet["Type"]):
	return self.Type_Prime() and match(["T_IDENT"]) and self.VariableDecl() and self.V()
	return True

	def S(self):
	print("S()", tokens[current_index])

	if peek().type in FirstSet["Stmt"]:
	return self.Stmt() and self.S()
	return True

	def Stmt(self):
	print("Stmt()", tokens[current_index])

	if peek().type in FirstSet["Expr"]:
	return self.Expr() and match(["T_SEMICOLON"])
	elif peek().type == "T_IF":
	return self.IfStmt()
	elif peek().type == "T_WHILE":
	return self.WhileStmt()
	elif peek().type == "T_FOR":
	return self.ForStmt()
	elif peek().type == "T_BREAK":
	return self.BreakStmt()
	elif peek().type == "T_RETURN":
	return self.ReturnStmt()
	elif peek().type == "T_PRINT":
	return self.PrintStmt()
	elif match(["T_LCB"]):
	return self.V() and self.S() and match(["T_RCB"])

	return False

	def E(self):
	print("StEmt()", tokens[current_index])

	if peek().type in FirstSet["Expr"]:
	return self.Expr()
	return True

	def IfStmt(self):
	print("IfStmt()", tokens[current_index])

	return match(["T_IF"]) and match(["T_LPAREN"]) and self.Expr() and match(["R_PAREN"]) and \
	self.Stmt() and self.Else()

	def Else(self):
	print("Else()", tokens[current_index])

	if match(["T_ELSE"]):
	return self.Stmt()
	return True

	def WhileStmt(self):
	print("WhileStmt()", tokens[current_index])

	return match(["T_WHILE"]) and match(["T_LPAREN"]) and self.Expr() and match(["T_RPAREN"]) and self.Stmt()

	def ForStmt(self):
	print("ForStmt()", tokens[current_index])

	return match(["T_FOR"]) and match(["T_LPAREN"]) and self.E() and match(["T_SEMICOLON"]) and \
	self.Expr() and match(["T_SEMICOLON"]) and self.E() and match(["T_RPAREN"]) and self.Stmt()

	def ReturnStmt(self):
	print("ReturnStmt()", tokens[current_index])

	return match(["T_RETURN"]) and self.E() and match(["T_SEMICOLON"])

	def BreakStmt(self):
	print("BreakStmt()", tokens[current_index])

	return match(["T_BREAK"]) and match(["T_SEMICOLON"])

	def PrintStmt(self):
	print("PrintStmt()", tokens[current_index])

	return match(["T_PRINT"]) and match(["T_LPAREN"]) and self.Pr() and \
	match(["T_RPAREN"]) and match(["T_SEMICOLON"])

	def Pr(self):
	print("Pr()", tokens[current_index])

	if peek().type in FirstSet["Expr"]:
	return self.Expr() and self.Pr_Prime()
	return False

	def Pr_Prime(self):
	print("Pr_Prime()", tokens[current_index])

	if match(["T_COMMA"]):
	return self.Expr() and self.Pr_Prime()
	return True

	def Expr(self):
	print("Expr()", tokens[current_index])

	if match(["T_IDENT"]):
	return self.LC()
	elif match(FirstSet["Constant"]):
	return self.Constant()
	elif match(["T_THIS"]):
	return True
	elif match(["T_LPAREN"]):
	return self.Expr() and match(["T_RPAREN"])
	elif match(["T_NOT"]):
	return self.Expr()
	elif match(["T_READINTEGER"]):
	return match(["T_LPAREN"]) and match(["T_RPAREN"])
	elif match(["T_READLINE"]):
	return match(["T_LPAREN"]) and match(["T_RPAREN"])
	elif match(["T_NEW"]):
	return match(["T_IDENT"])
	elif match(["T_NEWARRAY"]):
	return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \
	self.Type() and match(["T_RPAREN"])

	def LC(self):
	print("LC()", tokens[current_index])

	if match(["T_ASSIGN"]):
	return self.Expr0()
	elif match(["T_DOT"]):
	return match(["T_IDENT"]) and self.GC()
	elif match(["T_LSB"]):
	return self.Expr() and match(["T_RSB"]) and match(["T_ASSIGN"]) and self.Expr0()
	elif match(["T_LPAREN"]):
	return self.Actuals() and match(["T_RPAREN"])

	return False

	def GC(self):
	print("GC()", tokens[current_index])

	if peek().type in ["T_ASSIGN"]:
	return self.Expr0()
	elif match(["L_PAREN"]):
	return self.Actuals() and match(["R_PAREN"])
	return False

	def Expr0(self):
	print("Expr0()", tokens[current_index])

	if peek().type in FirstSet["Actuals"]:
	return self.Expr1() and self.Expr0()
	return False

	def Expr0_Prime(self):
	print("Expr0`()", tokens[current_index])

	if match(["T_OR"]):
	return self.Expr1() and self.Expr0_Prime()
	else:
	return True

	def Expr1(self):
	print("Expr1()", tokens[current_index])

	if peek().type in FirstSet["Actuals"]:
	return self.Expr2() and self.Expr1_Prime()
	return False

	def Expr1_Prime(self):
	print("Expr1`()", tokens[current_index])

	if match(["T_AND"]):
	return self.Expr2() and self.Expr1_Prime()
	else:
	return True

	def Expr2(self):
	print("Expr2()", tokens[current_index])

	if peek().type in FirstSet["Actuals"]:
	return self.Expr3() and self.Expr2_Prime()
	return False

	def Expr2_Prime(self):
	print("Expr2`()", tokens[current_index])

	if match(["T_NEQ"]):
	return self.Expr3() and self.Expr2_Prime()
	else:
	return True

	def Expr3(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr4() and self.Expr3_Prime()
	return False

	def Expr3_Prime(self):
	if match(["T_EQ"]):
	return self.Expr4() and self.Expr3_Prime()
	else:
	return True

	def Expr4(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr5() and self.Expr4_Prime()
	return False

	def Expr4_Prime(self):
	if match(["T_GEQ"]):
	return self.Expr5() and self.Expr4_Prime()
	else:
	return True

	def Expr5(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr6() and self.Expr5_Prime()
	return False

	def Expr5_Prime(self):
	if match(["T_GT"]):
	return self.Expr6() and self.Expr5_Prime()
	else:
	return True

	def Expr6(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr7() and self.Expr6_Prime()
	return False

	def Expr6_Prime(self):
	if match(["T_LEQ"]):
	return self.Expr7() and self.Expr6_Prime()
	else:
	return True

	def Expr7(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr8() and self.Expr7_Prime()
	return False

	def Expr7_Prime(self):
	if match(["T_LT"]):
	return self.Expr8() and self.Expr7_Prime()
	else:
	return True

	def Expr8(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr9() and self.Expr8_Prime()
	return False

	def Expr8_Prime(self):
	if match(["T_MINUS"]):
	return self.Expr9() and self.Expr8_Prime()
	else:
	return True

	def Expr9(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr10() and self.Expr9_Prime()
	return False

	def Expr9_Prime(self):
	if match(["T_PLUS"]):
	return self.Expr10() and self.Expr9_Prime()
	else:
	return True

	def Expr10(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr11() and self.Expr10_Prime()
	return False

	def Expr10_Prime(self):
	if match(["T_MOD"]):
	return self.Expr11() and self.Expr10_Prime()
	else:
	return True

	def Expr11(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr12() and self.Expr11_Prime()
	return False

	def Expr11_Prime(self):
	if match(["T_DIV"]):
	return self.Expr12() and self.Expr11_Prime()
	else:
	return True

	def Expr12(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Expr13() and self.Expr12_Prime()
	return False

	def Expr12_Prime(self):
	if match(["T_MULT"]):
	return self.Expr13() and self.Expr12_Prime()
	else:
	return True

	def Expr13(self):
	if match(["T_IDENT"]):
	return self.FC()
	elif peek().type in FirstSet["Constant"]:
	return self.Constant()
	elif match(["T_THIS"]):
	return True
	elif match(["T_LPAREN"]):
	return self.Expr() and match(["T_RPAREN"])
	elif match(["T_NOT"]):
	return self.Expr()
	elif match(["T_READINTEGER"]):
	return match(["T_LPAREN"]) and match(["T_RPAREN"])
	elif match(["T_READLINE"]):
	return match(["T_LPAREN"]) and match(["T_RPAREN"])
	elif match(["T_NEW"]):
	return match(["T_IDENT"])
	elif match(["T_NEWARRAY"]):
	return match(["T_LPAREN"]) and self.Expr() and match(["T_COMMA"]) and \
	self.Type() and match(["T_RPAREN"])

	def FC(self):
	if match(["T_DOT"]):
	return match(["T_IDENT"]) and self.FC_Prime()
	elif match(["T_LSB"]):
	return self.Expr() and match(["T_RSB"])
	elif match(["T_LPAREN"]):
	return self.Actuals() and match(["T_RPAREN"])
	else:
	return True

	def FC_Prime(self):
	if match(["T_LPAREN"]):
	return self.Actuals() and match(["T_RPAREN"])
	else:
	return True

	def Actuals(self):
	if peek().type in FirstSet["Actuals"]:
	return self.Pr()
	else:
	return True

	def Constant(self):
	return match(FirstSet["Constant"])


	def main():
	g = Grammar()
	print(g.Program())
	print(current_index)

	main()
	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	import string
	import sys
	import logging
	import pickle

	logging.basicConfig(level=logging.CRITICAL)

	filename = ""
	code = ""
	LINE_NO = 1


	if len(sys.argv) == 2:
	filename = sys.argv[1]
	f = open(filename, "r")
	code = f.read()
	else:
	print("Unknown # of args")
	sys.exit(-1)


	def preprocess(_code):
	output = _code
	while "/*" in output:
	output = output[:output.index("/")] + output[output.index("/") + 2:]

	while "//" in output:
	output = output[:output.index(
	"//")] + output[output.index("\n", output.index("//")):]
	return output


	code = preprocess(code)

	RESERVED_WORDS = ["void", "int", "double", "bool", "string", "class", "interface", "null", "this",
	"extends", "implements", "for", "while", "if", "else", "return", "break", "new",
	"NewArray", "Print", "ReadInteger", "ReadLine"]
	# In[3]:


	class Token:
	def __init__(self, _type, value=None, priority=0):
	self.type = _type
	self.value = value
	self.priority = priority
	global LINE_NO
	self.line_no = LINE_NO

	def __repr__(self):
	if not self.value:
	return self.type + " AT Line#" + str(self.line_no)
	return self.type + " VAL=" + self.value + " AT Line#" + str(self.line_no)


	# In[4]:


	def is_reserve(s):
	return s in RESERVED_WORDS


	def is_identifier(s):
	if s and s[0] in string.ascii_letters and len(s) < 32:
	for c in s[1:]:
	if not (c.isalnum() or c == '_'):
	return False
	return True
	else:
	return False


	def is_non_resv_non_ident(s):
	return not is_reserve(s) and not is_identifier(s)


	def is_white_space(s):
	return s.isspace()


	def is_integer(s):
	if s[:2] in ["0x", "0X"] and all(c in string.hexdigits for c in s[2:]):
	return True
	if s.isdigit():
	return True
	return False


	def is_string(s):
	if s.count('"') == 2 and s[0] == '"' and s[-1] == '"' and '"' not in s[1:-1] and '\n' not in s:
	return True
	return False


	def is_double(s):
	if s[0].isdigit() and s.count(".") == 1:
	if s.count("E") == 1 and s.index("E") > s.index(".") and s[s.index("E")+1:]:
	if all(c in string.digits for c in s[s.index("E") + 1:]):
	return True
	elif s[s.index("E")+1] in ["+", "-"] and all(c in string.digits for c in s[s.index("E")+2:]):
	return True
	elif "E" not in s and all(c in string.digits for c in s[s.index(".")+1:]):
	return True
	return False


	# In[5]:
	def all_tokens():
	tokens = []
	for resr in RESERVED_WORDS:
	tokens.append(Token("T_" + resr.upper()))

	tokens.append(Token("T_IDENT"))
	tokens.append(Token("T_BOOL"))
	tokens.append(Token("T_INTCONSTANT"))
	tokens.append(Token("T_STRINGCONSTANT"))
	tokens.append(Token("T_DOUBLECONSTANT"))
	tokens.append(Token("T_PLUS"))
	tokens.append(Token("T_MINUS"))
	tokens.append(Token("T_MULT"))
	tokens.append(Token("T_DIV"))
	tokens.append(Token("T_MOD"))
	tokens.append(Token("T_LT"))
	tokens.append(Token("T_LEQ"))
	tokens.append(Token("T_GT"))
	tokens.append(Token("T_GEQ"))
	tokens.append(Token("T_ASSIGN"))
	tokens.append(Token("T_EQ"))
	tokens.append(Token("T_NEQ"))
	tokens.append(Token("T_AND"))
	tokens.append(Token("T_OR"))
	tokens.append(Token("T_NOT"))
	tokens.append(Token("T_SEMICOLON"))
	tokens.append(Token("T_COMMA"))
	tokens.append(Token("T_DOT"))
	tokens.append(Token("T_ARRDECL"))
	tokens.append(Token("T_LSB"))
	tokens.append(Token("T_RSB"))
	tokens.append(Token("T_LPAREN"))
	tokens.append(Token("T_RPAREN"))
	tokens.append(Token("T_LCB"))
	tokens.append(Token("T_RCB"))

	return tokens


	def ret_token(s):
	tokens = []
	if is_reserve(s):
	tokens.append(Token("T_" + s.upper(), value=None, priority=999))

	if is_identifier(s):
	tokens.append(Token("T_IDENT", s))

	if s in ["true", "false"]:
	tokens.append(Token("T_BOOL", s, 998))

	if is_integer(s):
	tokens.append(Token("T_INTCONSTANT", s))

	if is_string(s):
	tokens.append(Token("T_STRINGCONSTANT", s))

	if is_double(s):
	tokens.append(Token("T_DOUBLECONSTANT", s))

	if s == "+":
	tokens.append(Token("T_PLUS"))
	if s == "-":
	tokens.append(Token("T_MINUS"))
	if s == "*":
	tokens.append(Token("T_MULT"))
	if s == "/":
	tokens.append(Token("T_DIV"))
	if s == "%":
	tokens.append(Token("T_MOD"))
	if s == "<":
	tokens.append(Token("T_LT"))
	if s == "<=":
	tokens.append(Token("T_LEQ"))
	if s == ">":
	tokens.append(Token("T_GT"))
	if s == ">=":
	tokens.append(Token("T_GEQ"))
	if s == "=":
	tokens.append(Token("T_ASSIGN"))
	if s == "==":
	tokens.append(Token("T_EQ"))
	if s == "!=":
	tokens.append(Token("T_NEQ"))

	if s == "&&":
	tokens.append(Token("T_AND"))
	if s == "\|\|":
	tokens.append(Token("T_OR"))
	if s == "!":
	tokens.append(Token("T_NOT"))

	if s == ";":
	tokens.append(Token("T_SEMICOLON"))
	if s == ",":
	tokens.append(Token("T_COMMA"))
	if s == ".":
	tokens.append(Token("T_DOT"))
	if s == "[]":
	tokens.append(Token("T_ARRDECL"))
	if s == "[":
	tokens.append(Token("T_LSB"))
	if s == "]":
	tokens.append(Token("T_RSB"))
	if s == "(":
	tokens.append(Token("T_LPAREN"))
	if s == ")":
	tokens.append(Token("T_RPAREN"))
	if s == "{":
	tokens.append(Token("T_LCB"))
	if s == "}":
	tokens.append(Token("T_RCB"))

	return tokens


	# In[ ]:


	# In[6]:
	OUTPUT = []
	identified = {}
	stack = ""


	for _cindex, char in enumerate(code):
	if char in string.whitespace and '"' not in stack:
	if char == "\n":
	LINE_NO += 1
	logging.debug("--------- Whitespace occur ----------")
	if identified:
	OUTPUT.append(
	identified[max(identified, key=lambda k: identified[k].priority)])
	logging.debug("Remaining {}".format(code[_cindex:]))

	identified.clear()
	stack = ""
	logging.debug("Stack = {}".format(stack))
	logging.debug("--------- Whitespace end ----------")

	continue

	stack += char
	logging.debug("\nSTACK {}".format(stack))
	identified_tokens = ret_token(stack)
	logging.debug("LATEST_TOKENS {}".format(identified_tokens))

	if not identified_tokens and identified:
	OUTPUT.append(
	identified[max(identified, key=lambda k: identified[k].priority)])
	logging.debug("OUTPUT {}".format(OUTPUT))
	logging.debug("Remaining: {}".format(code[_cindex:]))

	identified.clear()
	stack = stack[-1]
	logging.debug("Stack {}".format(stack))
	identified_tokens = ret_token(stack)
	logging.debug("LATEST_TOKENS = {}".format(identified_tokens))

	identified.clear()
	for identified_token in identified_tokens:
	if identified_token.type in identified.keys():
	identified[identified_token.type] = identified_token
	else:
	identified[identified_token.type] = identified_token
	logging.debug("IDENTIFIED = {}".format(identified))

	if _cindex == len(code) - 1:
	OUTPUT.append(
	identified[max(identified, key=lambda k: identified[k].priority)])

	logging.debug("STACK {}".format(stack))
	print("OUTPUT {}".format(OUTPUT))

	output_file = open(sys.argv[1] + ".tokens", "wb")
	pickle.dump(OUTPUT, output_file)