Skip to content

Instantly share code, notes, and snippets.

@animatedlew
Created October 30, 2017 02:11
Show Gist options
  • Save animatedlew/7ce3135ceeb132ddc41f289c41b5a09a to your computer and use it in GitHub Desktop.
Save animatedlew/7ce3135ceeb132ddc41f289c41b5a09a to your computer and use it in GitHub Desktop.
class MatchError(BaseException):
pass
class Lexer:
tokens = []
def __init__(self, source):
self.index = 0
self.lineno = 0
self.source = source
# predicate helpers
def is_digit(self, s):
return s >= '0' and s <= '9'
def is_ws(self, s):
r = s in ['\n', '\r', '\t', '\f']
if s == '\n':
self.lineno += 1
return r
def is_comment(self, s):
return s == '#'
def comment(self):
content = ''
while self.source[self.index] is not '\n':
content += self.source[self.index]
self.index += 1
self.ws() # important
return content
def ws(self):
while self.is_ws(self.source[self.index]):
self.index += 1
def peek(self):
return self.source[self.index]
def tokenize(self):
while (self.index < len(self.source)):
if self.is_ws(self.peek()):
print('consuming whitespace...')
self.ws()
elif self.is_comment(self.peek()):
self.tokens.append(self.comment())
else:
print("Unrecognized input.")
return
def match(self, s):
if s == self.peek():
self.index += 1
else:
raise MatchError("Expected: {}, Encountered: {}".format(s, self.peek()))
def __str__(self):
return "<index: {}, lineno: {}>".format(self.index, self.lineno)
with open("requirements.txt") as source:
lexer = Lexer(source.read())
lexer.tokenize()
print(lexer)
consuming whitespace...
Unrecognized input.
<index: 20, lineno: 4>
# comment section
click>=5.7
first<2.0.1
pip-tools==1.10.1
six<=1.11.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment