Created
October 28, 2012 21:27
-
-
Save agrif/3969986 to your computer and use it in GitHub Desktop.
S-expression parser in python (minus value classes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from string import whitespace, digits | |
from values import Cell, Symbol, Integer | |
token_boundaries = '(.)' + whitespace | |
# parser functions return (result, leftover) tuples | |
# parse a token single token, respecting token boundaries | |
def parse_token(s): | |
# find the first token boundary character | |
indices = [s.find(boundary) for boundary in token_boundaries] | |
indices = [index for index in indices if index > 0] | |
# if there are none, return the whole string with no leftover | |
if not indices: | |
return (s, '') | |
# if there is one, split it out and strip off whitespace from the rest | |
index = min(indices) | |
return (s[:index], s[index:].lstrip(whitespace)) | |
def parse_int(s): | |
# parse the first token and turn it into an int | |
val, rest = parse_token(s) | |
return (Integer(int(val)), rest) | |
def parse_symbol(s): | |
# parse the first token and turn it into a symbol | |
val, rest = parse_token(s) | |
# special nil handling | |
if val == "nil": | |
return (None, rest) | |
return (Symbol(val), rest) | |
# accepts a string *without* the leading open paren | |
# like so: `first second third . cdr) ...` | |
def parse_cell(s): | |
if s[0] == ')': | |
# end of list. None ~= empty list | |
return (None, s[1:].lstrip(whitespace)) | |
# get the car from the first value | |
car, rest = parse_value(s) | |
# check for a dot at the beginning of rest, indicating an improper list | |
if rest[0] == '.': | |
# improper list! | |
# last value can be any sort of value | |
cdr, rest = parse_value(rest[1:].lstrip(whitespace)) | |
else: | |
# no dot found yet | |
# parse the rest of the list into cdr | |
cdr, rest = parse_cell(rest) | |
return (Cell(car, cdr), rest) | |
def parse_value(s): | |
# figure out what sort of thing to parse based on the first character | |
if s[0] in digits: | |
return parse_int(s) | |
if s[0] == '(': | |
# shave off this open paren, since parse_cell expects it to be gone | |
return parse_cell(s[1:].lstrip(whitespace)) | |
else: | |
# if nothing else matched, it must be a symbol | |
return parse_symbol(s) | |
print(parse_value("52 a b")) | |
print(parse_value("apples oranges")) | |
print(parse_value("(apples oranges pear)")) | |
print(parse_value("(apples oranges . pear)")) | |
print(parse_value("(apples oranges . nil)")) | |
print(parse_value("(oranges (this . that))")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment