Created
December 6, 2017 00:08
-
-
Save NQNStudios/0a799a76c04d075d14f7e334b8d8feba to your computer and use it in GitHub Desktop.
Simple LISP parser.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Return the given token as either a string, or the proper number type | |
def properType(token): | |
# This will throw an exception if it fails | |
try: | |
return int(token) | |
# Try again to return a float | |
except ValueError: | |
return float(token) | |
# Otherwise it's just a string. | |
finally: | |
return token | |
# Return the contents of the first pair of quotes encountered. | |
# Second return value is the number of characters consumed | |
def getString(lisp): | |
# TODO allow backslash to escape quotes inside of strings | |
# The first character will either be " or '. Search for the matching quote | |
# to terminate the strings | |
string_contents = lisp[1:lisp.find(lisp[0], 1)] | |
return string_contents, len(string_contents) + 2 | |
# Recursive: Return a the list of the elements in the given Lisp string, or | |
# the atom specified. | |
# Second return value is the number of characters consumed, for the caller to | |
# skip over | |
# Last return value is True if a list was returned. | |
def getElements(lisp): | |
elements = [] | |
# Just tokenize until we encounter '(',')', quotes, or the end of the string. | |
current_token = '' | |
current_index = 0 | |
while current_index < len(lisp): | |
char = lisp[current_index] | |
# Encounter a space | |
if char == ' ': | |
# If this string terminates a token, add the token as an element | |
# Because of this check, multiple spaces in a row won't yield empty tokens | |
if len(current_token) > 0: | |
elements.append(properType(current_token)) | |
current_token = '' | |
# Encounter the start of another list | |
elif char == '(': | |
# Although it's a minor syntax error for ( to occur in the middle | |
# of another raw token, we can cover for the user by treating the ( | |
# as that token's termination | |
if len(current_token) > 0: | |
elements.append(properType(current_token)) | |
current_token = '' | |
# Recursively call the function and add the result as an element | |
child_elements, chars_consumed = getElements(lisp[current_index+1:]) | |
elements.append(child_elements) | |
current_index += chars_consumed + 1 | |
# The termination of a list means we're definitely returning a list | |
elif char == ')': | |
# If a token was started, add it | |
if len(current_token) > 0: | |
elements.append(current_token) | |
return elements, current_index | |
# Single or double quotes start a string | |
elif char == '"' or char =="'": | |
# Although it's a minor syntax error for a quote to occur in the middle | |
# of another raw token, we can cover for the user by treating the ( | |
# as that token's termination | |
if len(current_token) > 0: | |
elements.append(properType(current_token)) | |
current_token = '' | |
string_contents, chars_consumed = getString(lisp[current_index:]) | |
elements.append(string_contents) | |
current_index += chars_consumed | |
# Normal characters just get added to the current token | |
else: | |
current_token += char | |
# Keep track of our position in the string | |
current_index += 1 | |
# If reaching the end of the string, add the last token unless it's removeEmptyElements | |
if len(current_token) > 0: | |
elements.append(current_token) | |
return elements, current_index | |
if __name__ == "__main__": | |
# Lisp REPL | |
while True: | |
# User enters Lisp code in the console -- we assume the Lisp to be valid | |
lispCode = raw_input('> ') | |
# getElements() always returns a list, even at root level | |
# Take the first element of the root because the input must be a valid lisp expression, | |
# not multiple. | |
astRoot = getElements(lispCode)[0][0] | |
print (astRoot) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment