Created
November 6, 2017 00:42
-
-
Save Pinacolada64/7bfde76e54eb2cd1a7e618979f95054f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# c64list clone in python 2.7 | |
# heavily based on https://ruslanspivak.com/lsbasi-part6/ | |
petscii = """ !"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[~]^_ ABCDEFGHIJKLMNOPQRSTUVWXYZ""" | |
keywords = (' ', 'end', 'for', 'next', 'input#', 'print') | |
tokens = (0x20, 0x80, 0x81, 0x82, 0x83, 0x99) | |
ignore_whitespace = False | |
""" | |
I've decided to leave " out of tokens, since quoted strings | |
seem like a whole class unto themselves, needs additional | |
processing | |
""" | |
def parse_line(data): | |
# empty line | |
if not data: | |
print "Empty line" | |
return None | |
return Lexer.get_next_token(data) | |
class Lexer(object): | |
def __init__(self, text): | |
# client string input, e.g. 'print "hello"' | |
self.keywords = None | |
self.text = text | |
# self.pos is an index into self.text | |
self.pos = 0 | |
self.current_char = self.text[self.pos] | |
# current list of tokenized bytes: | |
self.token_output = [] | |
def error(self): | |
raise Exception('Invalid character') | |
def advance(self): | |
"""Advance the 'pos' pointer and set the 'current_char' variable.""" | |
self.pos += 1 | |
if self.pos > len(self.text) - 1: | |
self.current_char = None # Indicates end of input | |
else: | |
self.current_char = self.text[self.pos] | |
def skip_whitespace(self): | |
if not ignore_whitespace: | |
while self.current_char is not None and self.current_char.isspace(): | |
self.advance() | |
def get_next_token(self): | |
"""Lexical analyzer (also known as scanner or tokenizer) | |
This method is responsible for breaking a sentence | |
apart into tokens, one token at a time. | |
""" | |
while self.current_char is not None: | |
if self.current_char.isspace(): | |
self.skip_whitespace() | |
continue | |
# search for defined keywords: | |
kw = self.keywords | |
if kw: | |
# token found. add it to output list: | |
self.token_output.append(tokens.index(keywords)) | |
# advance 'pos' by as many chars are in keyword: | |
for i in len(kw): | |
self.advance() | |
return self.keywords(tokens) | |
else: | |
return None | |
class Interpreter(object): | |
def __init__(self, lexer): | |
self.lexer = lexer | |
# set current token to the first token taken from the input | |
self.current_token = self.lexer.get_next_token() | |
def error(self): | |
raise Exception('Invalid syntax') | |
def eat(self, token_type): | |
# compare the current token type with the passed token | |
# type and if they match then "eat" the current token | |
# and assign the next token to the self.current_token, | |
# otherwise raise an exception. | |
if self.current_token.type == token_type: | |
self.current_token = self.lexer.get_next_token() | |
else: | |
self.error() | |
def main(): | |
while True: | |
try: | |
# To run under Python3 replace 'raw_input' call | |
# with 'input' | |
text = raw_input('pyc64list> ') | |
except EOFError: | |
break | |
if not text: | |
continue | |
lexer = Lexer(text) | |
interpreter = Interpreter(lexer) | |
result = interpreter.text | |
print(result) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment