Skip to content

Instantly share code, notes, and snippets.

@evanxg852000
Last active August 3, 2017 12:31
Show Gist options
  • Save evanxg852000/96750c219dd1552d774075d9424617a0 to your computer and use it in GitHub Desktop.
Save evanxg852000/96750c219dd1552d774075d9424617a0 to your computer and use it in GitHub Desktop.
Ultra Tiny Compiler in python
# inspired by https://the-super-tiny-compiler.glitch.me/
'''
(print 12)
(add 2 2)
(subtract 4 2)
(add 2 (subtract 4 2))
'''
import re
import sys
class Token(object):
def __init__(self, type, value):
self.type= type
self.value = value
def __repr__(self):
return '<{}:{}>'.format(self.type, self.value)
class Node(Token):
def __init__(self, type, value):
Token.__init__(self, type, value)
self.children = []
def __repr__(self):
cstr =''
for child in self.children :
cstr += child.__repr__() + '\n'
str = '''{}:{} \n{}'''
return str.format(self.type, self.value, cstr)
# Tokenizer -> tokens (lexems)
def lexer(input):
tokens = []
pos = 0 #position
char = None
WHITESPACE = re.compile('\s')
FIGURE = re.compile('[0-9]')
INDENTIFIER = re.compile('[a-zA-Z]')
while (pos < len(input)):
char = input[pos]
if WHITESPACE.match(char):
pos +=1
continue
if char == '(':
tokens.append(Token('lparen', char))
pos += 1
continue
if char == ')':
tokens.append(Token('rparen', char))
pos += 1
continue
if FIGURE.match(char):
num = ''
while FIGURE.match(char):
num += char
pos +=1 # consume the character
char = input[pos]
tokens.append(Token('number', num))
continue
if char == '"':
str = ''
pos += 1 # skip the opening "
char = input[pos]
while char !='"': # error checking needed here (let's keep it simple)
str += char
pos += 1
char = input[pos]
pos += 1 # skip the closing "
tokens.append(Token('string', str))
continue
if INDENTIFIER.match(char):
idf = ''
while INDENTIFIER.match(char):
idf += char
pos +=1 # consume the character
char = input[pos]
tokens.append(Token('identifier', idf))
continue
raise 'Unknown character !'
return tokens
# Parser -> ast
def parser (tokens):
pos = 0
def walk():
nonlocal pos
tk = tokens[pos]
if tk.type == 'number':
pos += 1
return Node('NumberLiteral', tk.value)
if tk.type == 'string':
pos += 1
return Node('StringLiteral', tk.value)
if tk.type == 'lparen' :
# skip lparen
pos += 1
tk = tokens[pos]
callExpr = Node('CallExpression', tk.value)
pos += 1
tk = tokens[pos]
while tk.type != 'rparen': # look for closing paren
callExpr.children.append(walk())
tk = tokens[pos]
pos += 1
return callExpr
#raise 'Unknown token!'
ast = Node('Program', None)
while pos < len(tokens):
ast.children.append(walk())
return ast
# Interpreter -> run
def interpreter(ast):
pass # please fill this in as a code kata
# code generation
def generator (ast):
if ast.type == 'Program':
template ='''
(function(add, sub, mul, div, print){
_code_
})(function (a, b){return a+b;},
function (a, b){return a-b;},
function (a, b){return a*b;},
function (a, b){return a/b;},
console.log)
'''
code = ''
for n in ast.children:
code += generator(n) +'; '
return template.replace('_code_', code)
if ast.type == 'StringLiteral':
return "'{}'".format(ast.value)
if ast.type == 'NumberLiteral':
return ast.value
if ast.type == 'CallExpression':
args = ''
for n in ast.children:
args += generator(n) +', '
args = args.strip(', ')
return '{}({})'.format(ast.value, args)
return template.replace('_code_', code)
def main(argv):
# REPL interpreter
# if len(argv) == 1 and argv[0] == '-i':
# while True:
# input = input()
# if(input == 'exit'):
# break
# tokens = lexer(prog)
# ast = parser(tokens)
# print(interpreter(ast))
prog = '''
(print 12)
(print "alex")
(print (add 12 (sub 7 3)))
'''
tokens = lexer(prog)
ast = parser(tokens)
code = generator(ast)
print(code)
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment