evanxg852000 · August 3, 2017 12:31
diff --git a/utc.py b/utc.py
 # inspired by https://the-super-tiny-compiler.glitch.me/
 '''
 (print 12)
 (add 2 2)
 (subtract 4 2)
 (add 2 (subtract 4 2))
 '''

 import re
 import sys

 class Token(object):
  def __init__(self, type, value):
    self.type= type
    self.value = value

  def __repr__(self):
    return '<{}:{}>'.format(self.type, self.value)

 class Node(Token):
  def __init__(self, type, value):
    Token.__init__(self, type, value)
    self.children = []

  def __repr__(self):
    cstr =''
    for child in self.children :
      cstr +=  child.__repr__() + '\n'
    str = '''{}:{} \n{}'''
    return str.format(self.type, self.value, cstr)

 # Tokenizer -> tokens (lexems)
 def lexer(input):
  tokens = []
  pos = 0 #position
  char = None
  
  WHITESPACE = re.compile('\s')
  FIGURE = re.compile('[0-9]')
  INDENTIFIER = re.compile('[a-zA-Z]')

  while (pos < len(input)):
    char = input[pos]

    if WHITESPACE.match(char):
      pos +=1
      continue

    if char == '(':
      tokens.append(Token('lparen', char))
      pos += 1
      continue

    if char == ')':
      tokens.append(Token('rparen', char))
      pos += 1
      continue

    if FIGURE.match(char):
      num = ''
      while FIGURE.match(char):
        num += char
        pos +=1  # consume the character
        char = input[pos]
      
      tokens.append(Token('number', num))
      continue

    if char == '"':
      str = ''
      pos += 1 # skip the opening "
      char = input[pos]
      while char !='"': # error checking needed here (let's keep it simple)
        str += char
        pos += 1
        char = input[pos]
      
      pos += 1 # skip the closing "
      tokens.append(Token('string', str))
      continue

    if INDENTIFIER.match(char):
      idf = ''
      while INDENTIFIER.match(char):
        idf += char
        pos +=1  # consume the character
        char = input[pos]
      
      tokens.append(Token('identifier', idf))
      continue

    raise 'Unknown character !'
  
  return tokens

 # Parser -> ast
 def parser (tokens):
  pos = 0 
  def walk():
    nonlocal pos
    tk = tokens[pos]
    if tk.type == 'number':
      pos += 1
      return Node('NumberLiteral', tk.value)
      
    if tk.type == 'string':
      pos += 1
      return Node('StringLiteral', tk.value)

    if tk.type == 'lparen' :
      # skip lparen
      pos += 1
      tk = tokens[pos] 
      callExpr = Node('CallExpression', tk.value)
      
      pos += 1
      tk = tokens[pos]
      while tk.type != 'rparen': # look for closing paren
        callExpr.children.append(walk())
        tk = tokens[pos]
        
      pos += 1
      return callExpr
    
    #raise 'Unknown token!'

  ast = Node('Program', None)
  while pos < len(tokens):
    ast.children.append(walk())
  
  return ast

 # Interpreter -> run
 def interpreter(ast):
  pass # please fill this in as a code kata

 # code generation
 def generator (ast):
  if ast.type == 'Program':
    template ='''
    (function(add, sub, mul, div, print){
      _code_
    })(function (a, b){return a+b;},
      function (a, b){return a-b;},
      function (a, b){return a*b;},
      function (a, b){return a/b;},
      console.log)
    '''
    code = ''
    for n in ast.children:
      code += generator(n) +'; '
    return template.replace('_code_', code)

  if ast.type == 'StringLiteral':
    return "'{}'".format(ast.value)

  if ast.type == 'NumberLiteral':
     return ast.value
  
  if ast.type == 'CallExpression':
    args = ''
    for n in ast.children:
      args += generator(n) +', '
    args = args.strip(', ')
    return '{}({})'.format(ast.value, args)

  return template.replace('_code_', code)

 def main(argv):
  # REPL interpreter
  # if len(argv) == 1 and argv[0] == '-i':
  #   while True:
  #     input = input()
  #     if(input == 'exit'):
  #       break
  #   tokens = lexer(prog)
  #   ast = parser(tokens)
  #   print(interpreter(ast))

  prog = '''
  (print 12)
  (print "alex")
  (print (add 12 (sub 7 3)))
  '''
  tokens = lexer(prog)
  ast = parser(tokens)
  code = generator(ast)
  print(code)

 if __name__ == '__main__':
  main(sys.argv[1:])
	# inspired by https://the-super-tiny-compiler.glitch.me/
	'''
	(print 12)
	(add 2 2)
	(subtract 4 2)
	(add 2 (subtract 4 2))
	'''

	import re
	import sys

	class Token(object):
	def __init__(self, type, value):
	self.type= type
	self.value = value

	def __repr__(self):
	return '<{}:{}>'.format(self.type, self.value)

	class Node(Token):
	def __init__(self, type, value):
	Token.__init__(self, type, value)
	self.children = []

	def __repr__(self):
	cstr =''
	for child in self.children :
	cstr += child.__repr__() + '\n'
	str = '''{}:{} \n{}'''
	return str.format(self.type, self.value, cstr)

	# Tokenizer -> tokens (lexems)
	def lexer(input):
	tokens = []
	pos = 0 #position
	char = None

	WHITESPACE = re.compile('\s')
	FIGURE = re.compile('[0-9]')
	INDENTIFIER = re.compile('[a-zA-Z]')

	while (pos < len(input)):
	char = input[pos]

	if WHITESPACE.match(char):
	pos +=1
	continue

	if char == '(':
	tokens.append(Token('lparen', char))
	pos += 1
	continue

	if char == ')':
	tokens.append(Token('rparen', char))
	pos += 1
	continue

	if FIGURE.match(char):
	num = ''
	while FIGURE.match(char):
	num += char
	pos +=1 # consume the character
	char = input[pos]

	tokens.append(Token('number', num))
	continue

	if char == '"':
	str = ''
	pos += 1 # skip the opening "
	char = input[pos]
	while char !='"': # error checking needed here (let's keep it simple)
	str += char
	pos += 1
	char = input[pos]

	pos += 1 # skip the closing "
	tokens.append(Token('string', str))
	continue

	if INDENTIFIER.match(char):
	idf = ''
	while INDENTIFIER.match(char):
	idf += char
	pos +=1 # consume the character
	char = input[pos]

	tokens.append(Token('identifier', idf))
	continue

	raise 'Unknown character !'

	return tokens

	# Parser -> ast
	def parser (tokens):
	pos = 0
	def walk():
	nonlocal pos
	tk = tokens[pos]
	if tk.type == 'number':
	pos += 1
	return Node('NumberLiteral', tk.value)

	if tk.type == 'string':
	pos += 1
	return Node('StringLiteral', tk.value)

	if tk.type == 'lparen' :
	# skip lparen
	pos += 1
	tk = tokens[pos]
	callExpr = Node('CallExpression', tk.value)

	pos += 1
	tk = tokens[pos]
	while tk.type != 'rparen': # look for closing paren
	callExpr.children.append(walk())
	tk = tokens[pos]

	pos += 1
	return callExpr

	#raise 'Unknown token!'

	ast = Node('Program', None)
	while pos < len(tokens):
	ast.children.append(walk())

	return ast

	# Interpreter -> run
	def interpreter(ast):
	pass # please fill this in as a code kata

	# code generation
	def generator (ast):
	if ast.type == 'Program':
	template ='''
	(function(add, sub, mul, div, print){
	_code_
	})(function (a, b){return a+b;},
	function (a, b){return a-b;},
	function (a, b){return a*b;},
	function (a, b){return a/b;},
	console.log)
	'''
	code = ''
	for n in ast.children:
	code += generator(n) +'; '
	return template.replace('_code_', code)

	if ast.type == 'StringLiteral':
	return "'{}'".format(ast.value)

	if ast.type == 'NumberLiteral':
	return ast.value

	if ast.type == 'CallExpression':
	args = ''
	for n in ast.children:
	args += generator(n) +', '
	args = args.strip(', ')
	return '{}({})'.format(ast.value, args)

	return template.replace('_code_', code)

	def main(argv):
	# REPL interpreter
	# if len(argv) == 1 and argv[0] == '-i':
	# while True:
	# input = input()
	# if(input == 'exit'):
	# break
	# tokens = lexer(prog)
	# ast = parser(tokens)
	# print(interpreter(ast))

	prog = '''
	(print 12)
	(print "alex")
	(print (add 12 (sub 7 3)))
	'''
	tokens = lexer(prog)
	ast = parser(tokens)
	code = generator(ast)
	print(code)

	if __name__ == '__main__':
	main(sys.argv[1:])