Skip to content

Instantly share code, notes, and snippets.

@agrif
Created October 28, 2012 21:27
Show Gist options
  • Save agrif/3969986 to your computer and use it in GitHub Desktop.
Save agrif/3969986 to your computer and use it in GitHub Desktop.
S-expression parser in python (minus value classes)
from string import whitespace, digits
from values import Cell, Symbol, Integer
token_boundaries = '(.)' + whitespace
# parser functions return (result, leftover) tuples
# parse a token single token, respecting token boundaries
def parse_token(s):
# find the first token boundary character
indices = [s.find(boundary) for boundary in token_boundaries]
indices = [index for index in indices if index > 0]
# if there are none, return the whole string with no leftover
if not indices:
return (s, '')
# if there is one, split it out and strip off whitespace from the rest
index = min(indices)
return (s[:index], s[index:].lstrip(whitespace))
def parse_int(s):
# parse the first token and turn it into an int
val, rest = parse_token(s)
return (Integer(int(val)), rest)
def parse_symbol(s):
# parse the first token and turn it into a symbol
val, rest = parse_token(s)
# special nil handling
if val == "nil":
return (None, rest)
return (Symbol(val), rest)
# accepts a string *without* the leading open paren
# like so: `first second third . cdr) ...`
def parse_cell(s):
if s[0] == ')':
# end of list. None ~= empty list
return (None, s[1:].lstrip(whitespace))
# get the car from the first value
car, rest = parse_value(s)
# check for a dot at the beginning of rest, indicating an improper list
if rest[0] == '.':
# improper list!
# last value can be any sort of value
cdr, rest = parse_value(rest[1:].lstrip(whitespace))
else:
# no dot found yet
# parse the rest of the list into cdr
cdr, rest = parse_cell(rest)
return (Cell(car, cdr), rest)
def parse_value(s):
# figure out what sort of thing to parse based on the first character
if s[0] in digits:
return parse_int(s)
if s[0] == '(':
# shave off this open paren, since parse_cell expects it to be gone
return parse_cell(s[1:].lstrip(whitespace))
else:
# if nothing else matched, it must be a symbol
return parse_symbol(s)
print(parse_value("52 a b"))
print(parse_value("apples oranges"))
print(parse_value("(apples oranges pear)"))
print(parse_value("(apples oranges . pear)"))
print(parse_value("(apples oranges . nil)"))
print(parse_value("(oranges (this . that))"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment