Last active
August 29, 2015 14:26
-
-
Save zelark/7124a70ecf67e6549635 to your computer and use it in GitHub Desktop.
Let’s Build A Simple Interpreter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Token types | |
# EOF (end-of-file) token is used to indicate that | |
# there is no more input left for lexical analysis | |
INTEGER, PLUS, MINUS, MUL, DIV, EOF = 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', 'EOF' | |
class Token(object): | |
def __init__(self, type, value): | |
# token type: INTEGER, PLUS, MINUS, or EOF | |
self.type = type | |
# token value: non-negative integer value, '+', '-', or None | |
self.value = value | |
def __str__(self): | |
"""String representation of the class instance. | |
Examples: | |
Token(INTEGER, 3) | |
Token(PLUS '+') | |
""" | |
return 'Token({type}, {value})'.format( | |
type=self.type, | |
value=repr(self.value) | |
) | |
def __repr__(self): | |
return self.__str__() | |
class Interpreter(object): | |
def __init__(self, text): | |
# client string input, e.g. "3 + 5", "12 - 5", etc | |
self.text = text | |
# self.pos is an index into self.text | |
self.pos = 0 | |
# current token instance | |
self.current_token = None | |
self.current_char = self.text[self.pos] | |
def error(self): | |
raise Exception('Error parsing input') | |
def advance(self): | |
"""Advance the 'pos' pointer and set the 'current_char' variable.""" | |
self.pos += 1 | |
if self.pos > len(self.text) - 1: | |
self.current_char = None # Indicates end of input | |
else: | |
self.current_char = self.text[self.pos] | |
def skip_whitespace(self): | |
while self.current_char is not None and self.current_char.isspace(): | |
self.advance() | |
def integer(self): | |
"""Return a (multidigit) integer consumed from the input.""" | |
result = '' | |
while self.current_char is not None and self.current_char.isdigit(): | |
result += self.current_char | |
self.advance() | |
return int(result) | |
def get_next_token(self): | |
"""Lexical analyzer (also known as scanner or tokenizer) | |
This method is responsible for breaking a sentence | |
apart into tokens. | |
""" | |
while self.current_char is not None: | |
if self.current_char.isspace(): | |
self.skip_whitespace() | |
continue | |
if self.current_char.isdigit(): | |
return Token(INTEGER, self.integer()) | |
if self.current_char == '+': | |
self.advance() | |
return Token(PLUS, '+') | |
if self.current_char == '-': | |
self.advance() | |
return Token(MINUS, '-') | |
if self.current_char == '*': | |
self.advance() | |
return Token(MUL, '*') | |
if self.current_char == '/': | |
self.advance() | |
return Token(DIV, '/') | |
self.error() | |
return Token(EOF, None) | |
def eat(self, token_types): | |
# check the current token type is contaned in the passed token | |
# types and if it's true then "eat" the current token | |
# and assign the next token to the self.current_token, | |
# otherwise raise an exception. | |
if self.current_token.type in token_types: | |
self.current_token = self.get_next_token() | |
else: | |
self.error() | |
def expr(self): | |
"""Parser / Interpreter | |
expr -> INTEGER PLUS INTEGER | |
expr -> INTEGER MINUS INTEGER | |
""" | |
# set current token to the first token taken from the input | |
self.current_token = self.get_next_token() | |
# we expect the current token to be an integer | |
left = self.current_token | |
self.eat((INTEGER)) | |
# we expect the current token to be either a '+' or '-' or '*' or '/' | |
op = self.current_token | |
self.eat((PLUS, MINUS, MUL, DIV)) | |
# we expect the current token to be an integer | |
right = self.current_token | |
self.eat((INTEGER)) | |
# after the above call the self.current_token is set to | |
# EOF token | |
# at this point either the INTEGER PLUS INTEGER or | |
# the INTEGER MINUS INTEGER sequence of tokens | |
# has been successfully found and the method can just | |
# return the result of adding or subtracting two integers, | |
# thus effectively interpreting client input | |
if op.type == PLUS: | |
result = left.value + right.value | |
elif op.type == MINUS: | |
result = left.value - right.value | |
elif op.type == MUL: | |
result = left.value * right.value | |
elif op.type == DIV: | |
result = left.value / right.value | |
return result | |
def expr_arbitrary(self): | |
"""Parser / Interpreter | |
expr -> INTEGER PLUS INTEGER MINUS INTEGER | |
""" | |
self.current_token = self.get_next_token() | |
left = self.current_token | |
self.eat((INTEGER)) | |
while (self.current_token.value): | |
op = self.current_token | |
self.eat((PLUS, MINUS)) | |
right = self.current_token | |
self.eat((INTEGER)) | |
if op.type == PLUS: | |
left = Token(INTEGER, left.value + right.value) | |
elif op.type == MINUS: | |
left = Token(INTEGER, left.value - right.value) | |
result = left.value | |
return result | |
def main(): | |
while True: | |
try: | |
text = input('calc> ') | |
except EOFError: | |
break | |
if not text: | |
continue | |
interpreter = Interpreter(text) | |
result = interpreter.expr_arbitrary() | |
print(result) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment