Created
April 28, 2014 05:41
-
-
Save jasonjohnson/11362596 to your computer and use it in GitHub Desktop.
Scanner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Scanner(object): | |
def __init__(self, stream): | |
self.offset = 0 | |
self.stream = stream | |
def current(self): | |
return self.stream[self.offset] | |
def step(self): | |
self.offset += 1 | |
def available(self): | |
return self.offset < len(self.stream) | |
def consume(self, token=None, valid=None): | |
token = token if token else [] | |
valid = valid if valid else [] | |
if not self.available(): | |
return token | |
elif self.current() not in valid: | |
return token | |
else: | |
token.append(self.current()) | |
self.step() | |
return self.consume(token, valid) | |
def scan(self): | |
tokens = [] | |
kinds = [ | |
('name', "abcdefghijklmnopqrstuvwxyz"), | |
('number', "0123456789"), | |
('assign', "="), | |
('space', " "), | |
('newline', "\r\n"), | |
('paren', "()"), | |
('colon', ":"), | |
('comma', ","), | |
('curly', "{}"), | |
('add', "+"), | |
('terminate', ";") | |
] | |
while True: | |
consumed = False | |
for kind, valid in kinds: | |
# Validity could be checked with a string of | |
# chars or a function which would test the | |
# structure of the stream. | |
token = self.consume(valid=valid) | |
if token: | |
tokens.append((kind, token)) | |
consumed = True | |
if not consumed: | |
if not self.available(): | |
break | |
else: | |
raise Exception("Unknown token:", self.current()) | |
return tokens | |
print Scanner(""" | |
var thing = 1; | |
def hello(a:int, b:int):int { | |
return a + b; | |
} | |
print(thing); | |
print(hello(2, 5)); | |
""").scan() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment