Skip to content

Instantly share code, notes, and snippets.

@stravant
Last active December 12, 2015 12:19
Show Gist options
  • Select an option

  • Save stravant/4771642 to your computer and use it in GitHub Desktop.

Select an option

Save stravant/4771642 to your computer and use it in GitHub Desktop.
Python Parser
import builtins
###############################################################################
#From SO: http://stackoverflow.com/questions/1988804/
class Memoize:
def __init__(self, f):
self.f = f
self.memo = {}
def __call__(self, *args):
if not args in self.memo:
self.memo[args] = self.f(*args)
return self.memo[args]
###############################################################################
class Token:
TokenTypes = {'Ident', 'Number', 'String', 'Symbol', 'Keyword'}
def __init__(self, tt, data):
self.TokenType = tt
self.Data = data
def __repr__(self):
return "Token(%s, %s)" % (repr(self.TokenType), repr(self.Data))
class LexException(Exception):
def __init__(self, string):
self.Message = string
def __str__(self):
return self.Message
class BreakException(Exception):
def __init__(self):
pass
class Lexer:
DefaultSymbolList = ['+', '-', '/', '*', '=', '(', ')', ';', '{', '}', '[', ']',
'<=', '>=', '==', '!=', '>', '<', '.', ':']
DefaultKeywordList = ['if', 'then', 'else', 'while', 'break', 'True', 'False', 'var']
EscapeCharMap = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\', '"': '"', "'": "'"}
def __init__(self, symbol_list = DefaultSymbolList,
keyword_list = DefaultKeywordList):
self.TokenList = []
self.SymbolList = symbol_list
self.KeywordList = keyword_list
self.__source = ''
self.__ptr = ''
def __eof(self):
return self.__ptr >= len(self.__source)
def __peek(self):
if self.__eof():
return ''
return self.__source[self.__ptr]
def __get(self):
if self.__eof():
return ''
val = self.__source[self.__ptr]
self.__ptr += 1
return val
def __is(self, c):
return self.__peek() == c
def __consume(self, c):
if self.__peek() == c:
self.__get()
return True
else:
return False
def lex(self, source):
self.__source = source
self.__ptr = 0
self.TokenList = []
while not self.__eof():
c = self.__peek()
#skip spaces
if c.isspace():
self.__get()
#parse ident
elif c.isalpha():
data = self.__get()
while self.__peek().isalpha() or self.__peek().isalpha():
data += self.__get()
if data in self.KeywordList:
self.TokenList.append(Token('Keyword', data))
else:
self.TokenList.append(Token('Ident', data))
#parse number
elif c.isdigit():
data = self.__get()
while self.__peek().isdigit():
data += self.__get()
if self.__consume('.'):
data += '.'
while self.__peek().isdigit():
data += self.__get()
self.TokenList.append(Token('Number', data))
#parse string
elif self.__is("'") or self.__is('"'):
delim = self.__get()
content = '' #the string constant in the source
content_str = '' #the actual string for this constant
while True:
c = self.__peek()
if c == '\\':
#handle escape char
self.__get()
content += '\\'
if self.__peek() in self.EscapeCharMap:
content += self.__peek()
content_str += self.EscapeCharMap[self.__peek()]
self.__get()
else:
raise LexException("Bad escape sequence: `%s`" % self.__peek())
elif c == delim:
#end of string
self.__get()
break
else:
#normal character
content += self.__peek()
content_str += self.__peek()
self.__get()
self.TokenList.append(Token('String', content_str))
#parse symbol
else:
if c in self.SymbolList:
self.__get()
if c+self.__peek() in self.SymbolList:
self.TokenList.append(Token('Symbol', c+self.__get()))
else:
self.TokenList.append(Token('Symbol', c))
else:
raise LexException("Bad character in source: `%s`" % c)
#done, have token list
self.TokenList.append(Token('Eof', ''))
###############################################################################
class TokenStream:
def __init__(self, tokList):
self.__token_list = [x for x in tokList]
self.__ptr = 0
def __is(self, ty, dat):
v = self.peek()
return v.TokenType == ty and v.Data == dat
def mark(self):
return self.__ptr
def reset(self, mark = 0):
self.__ptr = mark
def eof(self):
return self.__token_list[self.__ptr].TokenType == 'Eof'
def peek(self):
return self.__token_list[self.__ptr]
def get(self):
v = self.peek()
if not self.eof():
self.__ptr += 1
return v
def is_symbol(self, symb):
return self.__is('Symbol', symb)
def consume_symbol(self, symb):
if self.is_symbol(symb):
self.get()
return True
else:
return False
def is_keyword(self, kw):
return self.__is('Keyword', kw)
def consume_keyword(self, kw):
if self.is_keyword(kw):
self.get()
return True
else:
return False
def is_ident(self):
return self.peek().TokenType == 'Ident'
def consume_ident(self):
if self.is_ident():
return self.get()
else:
return False
def is_string(self):
return self.peek().TokenType == 'String'
def is_number(self):
return self.peek().TokenType == 'Number'
###############################################################################
class AstNode_ExprCall:
def __init__(self, basenode):
self.Type = 'ExprCall'
self.BaseNode = basenode
self.ArgList = []
def __str__(self):
return "CALL( %s, %s )" % (self.BaseNode, ', '.join([ str(x) for x in self.ArgList]))
class AstNode_ExprIndex:
def __init__(self, basenode, arg):
self.Type = 'ExprIndex'
self.BaseNode = basenode
self.Arg = arg
def __str__(self):
return "INDEX( %s, %s )" % (self.BaseNode, self.Arg)
class AstNode_LiteralNumber:
def __init__(self, number):
self.Type = 'LiteralNumber'
self.Data = number
def __str__(self):
return "NUMBER<%s>" % self.Data
class AstNode_LiteralString:
def __init__(self, string):
self.Type = 'LiteralString'
self.Data = string
def __str__(self):
return "STRING<'%s'>" % self.Data
class AstNode_LiteralBool:
def __init__(self, val):
self.Type = 'LiteralBool'
self.Data = val
def __str__(self):
return "TRUE<>" if self.Data else "FALSE<>"
class AstNode_LiteralVar:
def __init__(self, var):
self.Type = 'LiteralVar'
self.Data = var
def __str__(self):
return "VAR<%s>" % self.Data
class AstNode_ExprBinop:
def __init__(self, lhs, oper, rhs):
self.Type = 'ExprBinop'
self.Lhs = lhs
self.Oper = oper
self.Rhs = rhs
def __str__(self):
return "%s %s %s" % (self.Lhs, self.Oper, self.Rhs)
class AstNode_ExprUnop:
def __init__(self, oper, rhs):
self.Type = 'ExprUnop'
self.Oper = oper
self.Rhs = rhs
def __str__(self):
return "%s %s" % (self.Oper, self.Rhs)
class AstNode_ExprEquality:
def __init__(self, oper):
self.Type = 'ExprEquality'
self.Oper = oper
self.ArgList = []
def __str__(self):
if len(self.ArgList) == 2:
return "%s == %s" % tuple(self.ArgList)
else:
return "EQUAL( %s )"
class AstNode_StatIf:
def __init__(self, cond, body, _else = None):
self.Type = 'StatIf'
self.Cond = cond
self.Body = body
self.Else = _else
def __str__(self):
return "IF( %s, %s, %s )" % (self.Cond, self.Body, self.Else)
class AstNode_StatWhile:
def __init__(self, cond, body):
self.Type = 'StatWhile'
self.Cond = cond
self.Body = body
class AstNode_StatBreak:
def __init__(self):
self.Type = 'StatBreak'
class AstNode_StatVarDef:
def __init__(self, name, typ, init):
self.Type = 'StatVarDef'
self.Name = name
self.ValueType = typ
self.Init = init
class AstNode_StatExpr:
def __init__(self, expr):
self.Type = 'StatExpr'
self.Expr = expr
class AstNode_StatList:
def __init__(self):
self.Type = 'StatList'
self.List = []
###############################################################################
class AstNode_Type:
def __init__(self, qualname):
self.Type = 'Type'
self.QualName = qualname #array of strings
class AstNode_StatClassDef:
def __init__(self):
self.Type = 'StatClassDef'
self.MemberList = []
self.MethodList = []
class AstNode_StatClassMember:
def __init__(self, name, typ):
self.Type = 'StatClassMember'
self.Name = name
self.ValueType = typ
class AstNode_StatClassMethod:
def __init__(self, name, ret, args):
self.Type = 'StatClassMethod'
self.Name = name
self.ReturnType = ret
self.ArgType = args
###############################################################################
class RTMethod:
def __init__(self, name, types, impl):
self.Name = name
self.TypeList = types
self.Impl = impl
class RTMember:
def __init__(self, name, type):
self.Name = name
self.Type = type
class RTType:
def __init__(self, name):
self.MethodList = []
self.MemberList = []
RTT_Num = RTType("Num")
RTT_Str = RTType("Str")
RTT_Bool = RTType("Bool")
RTT_Void = RTType("Void")
RTT_Class = RTType("Class")
def get_RTT_ArrayOf(ty):
arrt = RTType(ty.Name + "[]")
def getitem_impl(arr,inx):
return arr[inx]
def setitem_impl(arr,inx,val):
arr[inx] = val
getitem = RTMethod("__getitem__", [ty, arrt, RTT_Num], getitem_impl)
setitem = RTMethod("__setitem__", [RTT_Void, arrt, RTT_Num], setitem_impl)
arrt.MethodList.append(getitem)
arrt.MethodList.append(setitem)
get_RTT_ArrayOf = Memoize(get_RTT_ArrayOf)
###############################################################################
class Parser:
def __init__(self, tok):
self.__tok = tok
def parse_expr_termchain(self, basenode):
while True:
if self.__tok.consume_symbol('('):
node = AstNode_ExprCall(basenode)
if not self.__tok.consume_symbol(')'):
while True:
arg = self.parse_expr()
node.ArgList.append(arg)
if not self.__tok.consume_symbol(','):
break
if not self.__tok.consume_symbol(')'):
raise LexException("Expected `)` after arg list")
basenode = node
elif self.__tok.consume_symbol('['):
node = AstNode_ExprIndex(basenode, self.parse_expr())
if not self.__tok.consume_symbol(']'):
raise LexException("Expected `]` after indexing expr")
basenode = node
elif self.__tok.consume_symbol('.'):
ident = self.__tok.consume_ident()
if not ident:
raise LexException("Expected ident after `.`")
node = AstNode_ExprIndex(basenode, \
AstNode_LiteralString(ident.Data))
basenode = node
else:
break
return basenode
def parse_expr_termbase(self):
node = None
if self.__tok.is_number():
data = self.__tok.get().Data
if float(data) == int(data):
data = int(data)
else:
data = float(data)
node = AstNode_LiteralNumber(data)
elif self.__tok.is_ident():
node = AstNode_LiteralVar(self.__tok.get().Data)
elif self.__tok.is_string():
node = AstNode_LiteralString(self.__tok.get().Data)
elif self.__tok.consume_keyword('True'):
node = AstNode_LiteralBool(True)
elif self.__tok.consume_keyword('False'):
node = AstNode_LiteralBool(False)
elif self.__tok.consume_symbol('('):
node = self.parse_expr()
if not self.__tok.consume_symbol(')'):
raise LexException("Expected `)`")
else:
raise LexException("Expected literal, got %s" % self.__tok.peek())
return self.parse_expr_termchain(node)
def parse_expr_unop(self):
if self.__tok.consume_symbol('-'):
return AstNode_ExprUnop('-', self.parse_expr_termbase())
else:
return self.parse_expr_termbase()
def parse_expr_factor(self):
base = self.parse_expr_unop()
while self.__tok.consume_symbol('*'):
expr = AstNode_ExprBinop(base, '*', self.parse_expr_unop())
base = expr
return base
def parse_expr_term(self):
base = self.parse_expr_factor()
while self.__tok.consume_symbol('+'):
expr = AstNode_ExprBinop(base, '+', self.parse_expr_factor())
base = expr
return base
def parse_expr_compare(self):
base = self.parse_expr_term()
if self.__tok.is_symbol('>=') or self.__tok.is_symbol('<=') or \
self.__tok.is_symbol('>') or self.__tok.is_symbol('<'):
oper = self.__tok.get().Data
expr = AstNode_ExprBinop(base, oper, self.parse_expr_term())
base = expr
return base
def parse_expr_equality(self):
base = self.parse_expr_compare()
if self.__tok.is_symbol('==') or self.__tok.is_symbol('!='):
node = AstNode_ExprEquality(self.__tok.peek().Data)
while self.__tok.consume_symbol(oper):
node.ArgList.append(self.parse_expr_compare())
base = node
return base
def parse_expr_assign(self):
base = self.parse_expr_equality()
if self.__tok.consume_symbol('='):
expr = AstNode_ExprBinop(base, '=', self.parse_expr_assign())
base = expr
return base
def parse_expr(self):
return self.parse_expr_assign()
def parse_type(self):
types = []
if not self.__tok.is_ident():
raise LexException("Type expected")
types.append(self.__tok.get().Data)
while self.__tok.consume_symbol('.'):
if not self.__tok.is_ident():
raise LexException("ident expected in type")
types.append(self.__tok.get().Data)
#####
return AstNode_Type(types)
def parse_statement(self):
node = None
if self.__tok.consume_keyword('if'):
cond = self.parse_expr()
body = self.parse_statement()
node = AstNode_StatIf(cond, body)
if self.__tok.consume_keyword('else'):
node.Else = self.parse_statement()
elif self.__tok.consume_keyword('while'):
cond = self.parse_expr()
body = self.parse_statement()
node = AstNode_StatWhile(cond, body)
elif self.__tok.consume_keyword('break'):
node = AstNode_StatBreak()
if not self.__tok.consume_symbol(';'):
raise LexException("Expected `;` after break statement")
elif self.__tok.consume_keyword('var'):
if not self.__tok.is_ident():
raise LexException("Expected var name after `var`")
name = self.__tok.get().Data
if not self.__tok.consume_symbol(':'):
raise LexException("Expected `:` after var name")
typ = self.parse_type()
init = None
if self.__tok.consume_symbol('='):
init = self.parse_expr()
if not self.__tok.consume_symbol(';'):
raise LexException("Expected `;` after var definition")
return AstNode_StatVarDef(name, typ, init)
elif self.__tok.consume_symbol('{'):
node = self.parse_statlist()
if not self.__tok.consume_symbol('}'):
raise LexException("Expected `}`")
else:
#expression-statement
node = AstNode_StatExpr(self.parse_expr())
if not self.__tok.consume_symbol(';'):
raise LexException("Expected `;` after statement")
return node
def parse_statlist(self):
node = AstNode_StatList()
#
while not self.__tok.eof() and not self.__tok.is_symbol('}'):
node.List.append(self.parse_statement())
return node
###############################################################################
class LexicalScope:
def __init__(self, parent = None):
self.Parent = parent
self.Data = {}
self.VarSet = set()
def declare(self, key):
self.VarSet.add(key)
def get(self, key):
if key in self.VarSet:
return self.Data[key]
elif self.Parent:
return self.Parent.get(key)
else:
raise ValueError("Undefined variable `%s`" % key)
def set(self, key, value):
self.Data[key] = value
def leave(self):
self.Data = {}
def enter(self):
self.Data = {}
def add(self, key, value):
self.declare(key)
self.set(key, value)
class LexicalScopeAssign:
def __init__(self):
self.__scope_stack = []
self.__scope_stack.append(LexicalScope())
def walk(self, ast_stat):
self.walk_stat(ast_stat)
def enter_scope(self):
newscope = LexicalScope(self.__scope_stack[-1])
self.__scope_stack.append(newscope)
return newscope
def leave_scope(self):
self.__scope_stack.pop()
def current_scope(self):
return self.__scope_stack[-1]
def global_scope(self):
return self.__scope_stack[0]
def walk_stat(self, ast_stat):
ast_stat.Scope = self.current_scope()
#
if ast_stat.Type == 'StatList':
ast_stat.Scope = self.enter_scope()
for stat in ast_stat.List:
self.walk_stat(stat)
self.leave_scope()
elif ast_stat.Type == 'StatIf':
self.walk_expr(ast_stat.Cond)
self.walk_stat(ast_stat.Body)
if ast_stat.Else:
self.walk_stat(ast_stat.Else)
elif ast_stat.Type == 'StatWhile':
self.walk_expr(ast_stat.Cond)
self.walk_stat(ast_stat.Body)
elif ast_stat.Type == 'StatBreak':
pass
elif ast_stat.Type == 'StatVarDef':
self.current_scope().declare(ast_stat.Name)
self.walk_expr(ast_stat.Init)
elif ast_stat.Type == 'StatExpr':
self.walk_expr(ast_stat.Expr)
else:
raise LexException("Not implemented `%s`" % ast_stat.Type)
def walk_expr(self, ast_expr):
ast_expr.Scope = self.current_scope()
#
if ast_expr.Type == 'ExprBinop':
self.walk_expr(ast_expr.Lhs)
self.walk_expr(ast_expr.Rhs)
elif ast_expr.Type == 'ExprUnop':
self.walk_expr(ast_expr.Rhs)
elif ast_expr.Type == 'LiteralNumber':
pass
elif ast_expr.Type == 'LiteralString':
pass
elif ast_expr.Type == 'LiteralBool':
pass
elif ast_expr.Type == 'LiteralVar':
pass
elif ast_expr.Type == 'ExprCall':
self.walk_expr(ast_expr.BaseNode)
for arg in ast_expr.ArgList:
self.walk_expr(arg)
elif ast_expr.Type == 'ExprIndex':
self.walk_expr(ast_expr.BaseNode)
self.walk_expr(ast_expr.Arg)
elif ast_expr.Type == 'ExprEquality':
for arg in ast_expr.ArgList:
self.walk_expr(arg)
else:
raise LexException("Not implemented, expression `%s`" % ast_expr.Type)
class TypeAssign:
def __init__(self):
pass
def walk_stat(self, ast_stat):
if ast_stat.Type == 'StatList':
for stat in ast_stat.List:
self.walk_stat(stat)
elif ast_stat.Type == 'StatIf':
self.walk_expr(ast_stat.Cond)
self.walk_stat(ast_stat.Body)
if ast_stat.Else:
self.walk_stat(ast_stat.Else)
elif ast_stat.Type == 'StatWhile':
self.walk_expr(ast_stat.Cond)
self.walk_stat(ast_stat.Body)
elif ast_stat.Type == 'StatBreak':
pass
elif ast_stat.Type == 'StatVarDef':
if ast_stat.Init:
self.walk_expr(ast_stat.Init)
elif ast_stat.Type == 'StatExpr':
self.walk_expr(ast_stat.Expr)
else:
raise LexException("Not implemented `%s`" % ast_stat.Type)
def walk_expr(self, ast_expr):
if ast_expr.Type == 'ExprBinop':
self.walk_expr(ast_expr.Lhs)
self.walk_expr(ast_expr.Rhs)
elif ast_expr.Type == 'ExprUnop':
self.walk_expr(ast_expr.Rhs)
elif ast_expr.Type == 'LiteralNumber':
pass
elif ast_expr.Type == 'LiteralString':
pass
elif ast_expr.Type == 'LiteralBool':
pass
elif ast_expr.Type == 'LiteralVar':
pass
elif ast_expr.Type == 'ExprCall':
self.walk_expr(ast_expr.BaseNode)
for arg in ast_expr.ArgList:
self.walk_expr(arg)
elif ast_expr.Type == 'ExprIndex':
self.walk_expr(ast_expr.BaseNode)
self.walk_expr(ast_expr.Arg)
elif ast_expr.Type == 'ExprEquality':
for arg in ast_expr.ArgList:
self.walk_expr(arg)
else:
raise LexException("Not implemented, expression `%s`" % ast_expr.Type)
#returns (value, type)
def eval_expr(ast_expr):
if ast_expr.Type == 'ExprBinop':
if ast_expr.Oper == '+':
return (eval_expr(ast_expr.Lhs)[0] + eval_expr(ast_expr.Rhs)[0], RTT_Void)
elif ast_expr.Oper == '*':
return (eval_expr(ast_expr.Lhs)[0] * eval_expr(ast_expr.Rhs)[0], RTT_Void)
elif ast_expr.Oper == '=':
if not ast_expr.Lhs.Type == 'LiteralVar':
raise LexException("Lhs of assignment must be a variable")
(val, ty) = eval_expr(ast_expr.Rhs)
ast_expr.Lhs.Scope.set(ast_expr.Lhs.Data, val)
return (val, ty)
elif ast_expr.Oper == '<=':
return (eval_expr(ast_expr.Lhs)[0] <= eval_expr(ast_expr.Rhs)[0], RTT_Void)
elif ast_expr.Oper == '>=':
return (eval_expr(ast_expr.Lhs)[0] >= eval_expr(ast_expr.Rhs)[0], RTT_Void)
elif ast_expr.Oper == '>':
return (eval_expr(ast_expr.Lhs)[0] > eval_expr(ast_expr.Rhs)[0], RTT_Void)
elif ast_expr.Oper == '<':
return (eval_expr(ast_expr.Lhs)[0] < eval_expr(ast_expr.Rhs)[0], RTT_Void)
else:
raise LexException("Not implemented")
elif ast_expr.Type == 'ExprUnop':
if ast_expr.Oper == '-':
return (-eval_expr(ast_expr.Rhs), RTT_Void)
else:
raise LexException("Not implemented")
elif ast_expr.Type == 'LiteralNumber':
return (ast_expr.Data, RTT_Num)
elif ast_expr.Type == 'LiteralString':
return (ast_expr.Data, RTT_Str)
elif ast_expr.Type == 'LiteralBool':
return (ast_expr.Data, RTT_Bool)
elif ast_expr.Type == 'LiteralVar':
return ast_expr.Scope.get(ast_expr.Data)
elif ast_expr.Type == 'ExprCall':
base = eval_expr(ast_expr.BaseNode)[0]
args = [eval_expr(arg)[0] for arg in ast_expr.ArgList]
if callable(base):
return (base(*args), RTT_Void)
else:
raise LexException("Can't call value `%s`" % base)
elif ast_expr.Type == 'ExprIndex':
base = eval_expr(ast_expr.BaseNode)[0]
arg = eval_expr(ast_expr.Arg)[0]
return (base[arg], RTT_Void)
elif ast_expr.Type == 'ExprEquality':
base = eval_expr(ast_expr.ArgList[0])
for arg in ast_expr.ArgList[1:]:
if (eval_expr(arg) == base) == (ast_expr.Oper == '=='):
return (False, RTT_Bool)
else:
return (True, RTT_Bool)
else:
raise LexException("Not implemented, expression `%s`" % ast_expr.Type)
def eval(ast_stat):
if ast_stat.Type == 'StatList':
ast_stat.Scope.enter()
for stat in ast_stat.List:
eval(stat)
ast_stat.Scope.leave()
elif ast_stat.Type == 'StatIf':
if eval_expr(ast_stat.Cond):
eval(ast_stat.Body)
elif ast_stat.Else:
eval(ast_stat.Else)
elif ast_stat.Type == 'StatWhile':
try:
while eval_expr(ast_stat.Cond):
eval(ast_stat.Body)
except BreakException:
pass
elif ast_stat.Type == 'StatVarDef':
if ast_stat.Init:
ast_stat.Scope.set(ast_stat.Name, eval_expr(ast_stat.Init))
elif ast_stat.Type == 'StatBreak':
raise BreakException()
elif ast_stat.Type == 'StatExpr':
eval_expr(ast_stat.Expr)
else:
raise LexException("Not implemented `%s`" % ast_stat.Type)
lex = Lexer()
# lex.lex("""
# data = 1;
# while (data) {
# data = input(">> ");
# if (len(data) > 0) {
# sum = 0;
# ptr = 0;
# while (ptr < len(data)) {
# sum = sum + ord(data[ptr]);
# ptr = ptr + 1;
# }
# print("Total: ");
# print(sum);
# }
# }
# print(testmap.Data);
# print(1 <= 8);
# """)
lex.lex("""
var a: String = "testing";
print(a[0] + a[-1]);
""")
parse = Parser(TokenStream(lex.TokenList))
ast = parse.parse_statlist()
scoper = LexicalScopeAssign()
scoper.walk(ast)
#set up environ
environ = scoper.global_scope()
environ.add('print', print)
environ.add('input', input)
environ.add('max', max)
environ.add('min', min)
environ.add('len', len)
environ.add('ord', ord)
environ.add('chr', chr)
environ.add('testmap', {"Data": 42})
#try:
eval(ast)
#except LexException as e:
# print("Failed: %s" % e.Message)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment