matthieubulte · December 26, 2014 23:52
diff --git a/lisp_combinator.py b/lisp_combinator.py
 # flatten :: [[a]] -> [a]
 def flatten(l):
    return [x for lx in l for x in lx]

 # first :: [a] -> a
 def first(l):
    return [l[0]] if l else []
    
 # toString :: [Char] -> String
 def toString(l):
    return ''.join(l)

 # type Parser a = String -> [(a, String)]
 # the parser is a function returning:
 # + the empty list if it failed
 # + a list of tuples of the successful parsed
 # value, and the unconsumed rest of the string

 # result is a parser that alwys return the same
 # output without consuming the input.
 #
 # result :: a -> Parser a
 def result(x):
    return lambda input: [(x, input)]

 # zero is the parser that always fails.
 #
 # zero :: Parser a
 zero = lambda input: []

 # item consumes one character or fails if the input is
 # empty.
 #
 # id :: Parser Char
 def id(input):
    if not input:
        return []
    else:
        return [(input[0], input[1:])]

 # the seq parser combinator represents the juxtaposition
 # of two parsers, or the AND operator.
 # 
 # seq :: Parser a -> Parser b -> Parser (a, b)
 def seq(p, q):
    return bind(p, lambda first:
           bind(q, lambda second:
               result((first, second))))

 # apply a transformformation to the result of a `seq`.
 #
 # mapSeq :: Parser a -> Parser b -> (a -> b -> c) -> Parser c
 def mapSeq(p, q, f):
    return bind(seq(p, q), lambda res:
        result(f(res[0], res[1])))

 # fmap :: (a -> b) -> Parser a -> Parser b
 def fmap(f, parser):
    return bind(parser, lambda x: result(f(x)))

 # seq can bring problems when composing several parsers
 # by having nested tuple as results, thus we introduce the
 # bind combinator to solve this problem.
 #
 # bind :: Parser a -> (a -> Parser b) -> Parser b
 def bind(p, f):
    def _parser(input):
        return flatten([
            f(p_res[0])(p_res[1])
            for p_res in p(input)
        ])

    return _parser

 # sat creates a parser consuming one character
 # satisfying the given property.
 #
 # sat :: (Char -> Bool) -> Parser Char
 def sat(p):
    return bind(id, lambda x:
        result([x]) if p(x) else zero)
        
 # plus combines the result of two parsers on the same input.
 # It can be thought as the OR operator.
 #
 # plus :: Parser a -> Parser a -> Parser a
 def plus(p, q):
    return lambda input: p(input) + q(input)

 # many consumes the input until the given parser fails.
 # Can be thought as the * operator.
 #
 # many :: Parser a -> Parser[a]
 def many(p):
    _many = bind(p, lambda x:
            bind(many(p), lambda xs:
                result(x+xs)))
    
    _parser = plus(_many, result([]))

    return lambda input: first(_parser(input))

 # many1 consumes one or more char from the input until
 # the given parser fails.
 # Can be thought as the + operator.
 #
 # many1 :: Parser a -> Parser [a]
 def many1(p):
    return bind(p, lambda x:
           bind(many(p), lambda xs:
               result(x + xs)))

 # char creates a parser that consumes one char if the first
 # char of the parser'S input is the same than the char's 
 # given parameter.
 #
 # char :: Char -> Parser Char
 def char(c):
    return sat(lambda x: x == c)

 # digit is a parser consuming one char of the input if it's
 # a digit.
 #
 # digit :: Parser Char
 digit = sat(lambda x: x.isdigit())

 # lower is a parser consuming one char of the input if it's
 # a lower case letter.
 #
 # lower :: Parser Char
 lower = sat(lambda x: 'a' <= x <= 'z')

 # upper is a parser consuming one char of the input if it's
 # an upper case letter.
 #
 # upper :: Parser Char
 upper = sat(lambda x: 'A' <= x <= 'Z')

 # upper is a parser consuming one char of the input if it's
 # an upper or lower case letter.
 #
 # letter :: Parser Char
 letter = plus(lower, upper)

 # alphaNum is a parser consuming one char if it's a number
 # or a letter.
 #
 # alphaNum :: Parser Char
 alphaNum = plus(letter, digit)

 # junk :: Parser String
 junk = many(sat(lambda c: c == ' ' or c == '\n' or c == '\t'))

 # token transform a parser to skip junk before applying
 # the given parser.
 #
 # token :: Parser a -> Parser a
 def token(p):
    return mapSeq(junk, p, lambda _, x: toString(x))

 # nat parses a natural number.
 #
 # natural :: Parser Nat
 natural = bind(token(many1(digit)), lambda x:
    result(int(x)))

 # identifier is a parser for an identifier, starting with
 # a letter, followed by zero or more letters or digits.
 #
 # identifier :: Parser String
 identifier = fmap(toString, token(mapSeq(letter, many(alphaNum), lambda x,y: x+y)))


 def lazy(p_factory):
    return lambda input: p_factory()(input)
        

 ### LISP

 lpar = token(char('('))
 rpar = token(char(')'))

 number = fmap(lambda x: [('NUMBER', x)], natural)
 symbol = fmap(lambda x: [('SYMBOL', x)], identifier)

 atom = fmap(lambda x: x, plus(symbol, number))

 def in_parenthesis(parser):
    return bind(lpar, lambda _:
           bind(parser, lambda res:
           bind(rpar, lambda _:
               result(res))))

 def _list():
    _parser = in_parenthesis(many1(lazy(_s_expr)))
    return fmap(lambda x: [('LIST', x)], _parser)

 def _s_expr():
    s_expr = lazy(_s_expr)
    list = lazy(_list)
    
    return plus(
        atom,
        plus(list,
        in_parenthesis(
            bind(s_expr, lambda s1:
            bind(token(char('.')), lambda _:
            bind(s_expr, lambda s2:
                result([('CONS', s1 + s2)]))))
        ))
    )

 s_expr = _s_expr()

 print s_expr('(abc def (123 h) (abc . def))')
	# flatten :: [[a]] -> [a]
	def flatten(l):
	return [x for lx in l for x in lx]

	# first :: [a] -> a
	def first(l):
	return [l[0]] if l else []

	# toString :: [Char] -> String
	def toString(l):
	return ''.join(l)

	# type Parser a = String -> [(a, String)]
	# the parser is a function returning:
	# + the empty list if it failed
	# + a list of tuples of the successful parsed
	# value, and the unconsumed rest of the string

	# result is a parser that alwys return the same
	# output without consuming the input.
	#
	# result :: a -> Parser a
	def result(x):
	return lambda input: [(x, input)]

	# zero is the parser that always fails.
	#
	# zero :: Parser a
	zero = lambda input: []

	# item consumes one character or fails if the input is
	# empty.
	#
	# id :: Parser Char
	def id(input):
	if not input:
	return []
	else:
	return [(input[0], input[1:])]

	# the seq parser combinator represents the juxtaposition
	# of two parsers, or the AND operator.
	#
	# seq :: Parser a -> Parser b -> Parser (a, b)
	def seq(p, q):
	return bind(p, lambda first:
	bind(q, lambda second:
	result((first, second))))

	# apply a transformformation to the result of a `seq`.
	#
	# mapSeq :: Parser a -> Parser b -> (a -> b -> c) -> Parser c
	def mapSeq(p, q, f):
	return bind(seq(p, q), lambda res:
	result(f(res[0], res[1])))

	# fmap :: (a -> b) -> Parser a -> Parser b
	def fmap(f, parser):
	return bind(parser, lambda x: result(f(x)))

	# seq can bring problems when composing several parsers
	# by having nested tuple as results, thus we introduce the
	# bind combinator to solve this problem.
	#
	# bind :: Parser a -> (a -> Parser b) -> Parser b
	def bind(p, f):
	def _parser(input):
	return flatten([
	f(p_res[0])(p_res[1])
	for p_res in p(input)
	])

	return _parser

	# sat creates a parser consuming one character
	# satisfying the given property.
	#
	# sat :: (Char -> Bool) -> Parser Char
	def sat(p):
	return bind(id, lambda x:
	result([x]) if p(x) else zero)

	# plus combines the result of two parsers on the same input.
	# It can be thought as the OR operator.
	#
	# plus :: Parser a -> Parser a -> Parser a
	def plus(p, q):
	return lambda input: p(input) + q(input)

	# many consumes the input until the given parser fails.
	# Can be thought as the * operator.
	#
	# many :: Parser a -> Parser[a]
	def many(p):
	_many = bind(p, lambda x:
	bind(many(p), lambda xs:
	result(x+xs)))

	_parser = plus(_many, result([]))

	return lambda input: first(_parser(input))

	# many1 consumes one or more char from the input until
	# the given parser fails.
	# Can be thought as the + operator.
	#
	# many1 :: Parser a -> Parser [a]
	def many1(p):
	return bind(p, lambda x:
	bind(many(p), lambda xs:
	result(x + xs)))

	# char creates a parser that consumes one char if the first
	# char of the parser'S input is the same than the char's
	# given parameter.
	#
	# char :: Char -> Parser Char
	def char(c):
	return sat(lambda x: x == c)

	# digit is a parser consuming one char of the input if it's
	# a digit.
	#
	# digit :: Parser Char
	digit = sat(lambda x: x.isdigit())

	# lower is a parser consuming one char of the input if it's
	# a lower case letter.
	#
	# lower :: Parser Char
	lower = sat(lambda x: 'a' <= x <= 'z')

	# upper is a parser consuming one char of the input if it's
	# an upper case letter.
	#
	# upper :: Parser Char
	upper = sat(lambda x: 'A' <= x <= 'Z')

	# upper is a parser consuming one char of the input if it's
	# an upper or lower case letter.
	#
	# letter :: Parser Char
	letter = plus(lower, upper)

	# alphaNum is a parser consuming one char if it's a number
	# or a letter.
	#
	# alphaNum :: Parser Char
	alphaNum = plus(letter, digit)

	# junk :: Parser String
	junk = many(sat(lambda c: c == ' ' or c == '\n' or c == '\t'))

	# token transform a parser to skip junk before applying
	# the given parser.
	#
	# token :: Parser a -> Parser a
	def token(p):
	return mapSeq(junk, p, lambda _, x: toString(x))

	# nat parses a natural number.
	#
	# natural :: Parser Nat
	natural = bind(token(many1(digit)), lambda x:
	result(int(x)))

	# identifier is a parser for an identifier, starting with
	# a letter, followed by zero or more letters or digits.
	#
	# identifier :: Parser String
	identifier = fmap(toString, token(mapSeq(letter, many(alphaNum), lambda x,y: x+y)))


	def lazy(p_factory):
	return lambda input: p_factory()(input)


	### LISP

	lpar = token(char('('))
	rpar = token(char(')'))

	number = fmap(lambda x: [('NUMBER', x)], natural)
	symbol = fmap(lambda x: [('SYMBOL', x)], identifier)

	atom = fmap(lambda x: x, plus(symbol, number))

	def in_parenthesis(parser):
	return bind(lpar, lambda _:
	bind(parser, lambda res:
	bind(rpar, lambda _:
	result(res))))

	def _list():
	_parser = in_parenthesis(many1(lazy(_s_expr)))
	return fmap(lambda x: [('LIST', x)], _parser)

	def _s_expr():
	s_expr = lazy(_s_expr)
	list = lazy(_list)

	return plus(
	atom,
	plus(list,
	in_parenthesis(
	bind(s_expr, lambda s1:
	bind(token(char('.')), lambda _:
	bind(s_expr, lambda s2:
	result([('CONS', s1 + s2)]))))
	))
	)

	s_expr = _s_expr()

	print s_expr('(abc def (123 h) (abc . def))')