Skip to content

Instantly share code, notes, and snippets.

@zelark
Last active August 29, 2015 14:26
Show Gist options
  • Save zelark/7124a70ecf67e6549635 to your computer and use it in GitHub Desktop.
Save zelark/7124a70ecf67e6549635 to your computer and use it in GitHub Desktop.
Let’s Build A Simple Interpreter
# Token types
# EOF (end-of-file) token is used to indicate that
# there is no more input left for lexical analysis
INTEGER, PLUS, MINUS, MUL, DIV, EOF = 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', 'EOF'
class Token(object):
def __init__(self, type, value):
# token type: INTEGER, PLUS, MINUS, or EOF
self.type = type
# token value: non-negative integer value, '+', '-', or None
self.value = value
def __str__(self):
"""String representation of the class instance.
Examples:
Token(INTEGER, 3)
Token(PLUS '+')
"""
return 'Token({type}, {value})'.format(
type=self.type,
value=repr(self.value)
)
def __repr__(self):
return self.__str__()
class Interpreter(object):
def __init__(self, text):
# client string input, e.g. "3 + 5", "12 - 5", etc
self.text = text
# self.pos is an index into self.text
self.pos = 0
# current token instance
self.current_token = None
self.current_char = self.text[self.pos]
def error(self):
raise Exception('Error parsing input')
def advance(self):
"""Advance the 'pos' pointer and set the 'current_char' variable."""
self.pos += 1
if self.pos > len(self.text) - 1:
self.current_char = None # Indicates end of input
else:
self.current_char = self.text[self.pos]
def skip_whitespace(self):
while self.current_char is not None and self.current_char.isspace():
self.advance()
def integer(self):
"""Return a (multidigit) integer consumed from the input."""
result = ''
while self.current_char is not None and self.current_char.isdigit():
result += self.current_char
self.advance()
return int(result)
def get_next_token(self):
"""Lexical analyzer (also known as scanner or tokenizer)
This method is responsible for breaking a sentence
apart into tokens.
"""
while self.current_char is not None:
if self.current_char.isspace():
self.skip_whitespace()
continue
if self.current_char.isdigit():
return Token(INTEGER, self.integer())
if self.current_char == '+':
self.advance()
return Token(PLUS, '+')
if self.current_char == '-':
self.advance()
return Token(MINUS, '-')
if self.current_char == '*':
self.advance()
return Token(MUL, '*')
if self.current_char == '/':
self.advance()
return Token(DIV, '/')
self.error()
return Token(EOF, None)
def eat(self, token_types):
# check the current token type is contaned in the passed token
# types and if it's true then "eat" the current token
# and assign the next token to the self.current_token,
# otherwise raise an exception.
if self.current_token.type in token_types:
self.current_token = self.get_next_token()
else:
self.error()
def expr(self):
"""Parser / Interpreter
expr -> INTEGER PLUS INTEGER
expr -> INTEGER MINUS INTEGER
"""
# set current token to the first token taken from the input
self.current_token = self.get_next_token()
# we expect the current token to be an integer
left = self.current_token
self.eat((INTEGER))
# we expect the current token to be either a '+' or '-' or '*' or '/'
op = self.current_token
self.eat((PLUS, MINUS, MUL, DIV))
# we expect the current token to be an integer
right = self.current_token
self.eat((INTEGER))
# after the above call the self.current_token is set to
# EOF token
# at this point either the INTEGER PLUS INTEGER or
# the INTEGER MINUS INTEGER sequence of tokens
# has been successfully found and the method can just
# return the result of adding or subtracting two integers,
# thus effectively interpreting client input
if op.type == PLUS:
result = left.value + right.value
elif op.type == MINUS:
result = left.value - right.value
elif op.type == MUL:
result = left.value * right.value
elif op.type == DIV:
result = left.value / right.value
return result
def expr_arbitrary(self):
"""Parser / Interpreter
expr -> INTEGER PLUS INTEGER MINUS INTEGER
"""
self.current_token = self.get_next_token()
left = self.current_token
self.eat((INTEGER))
while (self.current_token.value):
op = self.current_token
self.eat((PLUS, MINUS))
right = self.current_token
self.eat((INTEGER))
if op.type == PLUS:
left = Token(INTEGER, left.value + right.value)
elif op.type == MINUS:
left = Token(INTEGER, left.value - right.value)
result = left.value
return result
def main():
while True:
try:
text = input('calc> ')
except EOFError:
break
if not text:
continue
interpreter = Interpreter(text)
result = interpreter.expr_arbitrary()
print(result)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment