Created
December 26, 2014 23:52
-
-
Save matthieubulte/089c384d2112e1df9965 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# flatten :: [[a]] -> [a] | |
def flatten(l): | |
return [x for lx in l for x in lx] | |
# first :: [a] -> a | |
def first(l): | |
return [l[0]] if l else [] | |
# toString :: [Char] -> String | |
def toString(l): | |
return ''.join(l) | |
# type Parser a = String -> [(a, String)] | |
# the parser is a function returning: | |
# + the empty list if it failed | |
# + a list of tuples of the successful parsed | |
# value, and the unconsumed rest of the string | |
# result is a parser that alwys return the same | |
# output without consuming the input. | |
# | |
# result :: a -> Parser a | |
def result(x): | |
return lambda input: [(x, input)] | |
# zero is the parser that always fails. | |
# | |
# zero :: Parser a | |
zero = lambda input: [] | |
# item consumes one character or fails if the input is | |
# empty. | |
# | |
# id :: Parser Char | |
def id(input): | |
if not input: | |
return [] | |
else: | |
return [(input[0], input[1:])] | |
# the seq parser combinator represents the juxtaposition | |
# of two parsers, or the AND operator. | |
# | |
# seq :: Parser a -> Parser b -> Parser (a, b) | |
def seq(p, q): | |
return bind(p, lambda first: | |
bind(q, lambda second: | |
result((first, second)))) | |
# apply a transformformation to the result of a `seq`. | |
# | |
# mapSeq :: Parser a -> Parser b -> (a -> b -> c) -> Parser c | |
def mapSeq(p, q, f): | |
return bind(seq(p, q), lambda res: | |
result(f(res[0], res[1]))) | |
# fmap :: (a -> b) -> Parser a -> Parser b | |
def fmap(f, parser): | |
return bind(parser, lambda x: result(f(x))) | |
# seq can bring problems when composing several parsers | |
# by having nested tuple as results, thus we introduce the | |
# bind combinator to solve this problem. | |
# | |
# bind :: Parser a -> (a -> Parser b) -> Parser b | |
def bind(p, f): | |
def _parser(input): | |
return flatten([ | |
f(p_res[0])(p_res[1]) | |
for p_res in p(input) | |
]) | |
return _parser | |
# sat creates a parser consuming one character | |
# satisfying the given property. | |
# | |
# sat :: (Char -> Bool) -> Parser Char | |
def sat(p): | |
return bind(id, lambda x: | |
result([x]) if p(x) else zero) | |
# plus combines the result of two parsers on the same input. | |
# It can be thought as the OR operator. | |
# | |
# plus :: Parser a -> Parser a -> Parser a | |
def plus(p, q): | |
return lambda input: p(input) + q(input) | |
# many consumes the input until the given parser fails. | |
# Can be thought as the * operator. | |
# | |
# many :: Parser a -> Parser[a] | |
def many(p): | |
_many = bind(p, lambda x: | |
bind(many(p), lambda xs: | |
result(x+xs))) | |
_parser = plus(_many, result([])) | |
return lambda input: first(_parser(input)) | |
# many1 consumes one or more char from the input until | |
# the given parser fails. | |
# Can be thought as the + operator. | |
# | |
# many1 :: Parser a -> Parser [a] | |
def many1(p): | |
return bind(p, lambda x: | |
bind(many(p), lambda xs: | |
result(x + xs))) | |
# char creates a parser that consumes one char if the first | |
# char of the parser'S input is the same than the char's | |
# given parameter. | |
# | |
# char :: Char -> Parser Char | |
def char(c): | |
return sat(lambda x: x == c) | |
# digit is a parser consuming one char of the input if it's | |
# a digit. | |
# | |
# digit :: Parser Char | |
digit = sat(lambda x: x.isdigit()) | |
# lower is a parser consuming one char of the input if it's | |
# a lower case letter. | |
# | |
# lower :: Parser Char | |
lower = sat(lambda x: 'a' <= x <= 'z') | |
# upper is a parser consuming one char of the input if it's | |
# an upper case letter. | |
# | |
# upper :: Parser Char | |
upper = sat(lambda x: 'A' <= x <= 'Z') | |
# upper is a parser consuming one char of the input if it's | |
# an upper or lower case letter. | |
# | |
# letter :: Parser Char | |
letter = plus(lower, upper) | |
# alphaNum is a parser consuming one char if it's a number | |
# or a letter. | |
# | |
# alphaNum :: Parser Char | |
alphaNum = plus(letter, digit) | |
# junk :: Parser String | |
junk = many(sat(lambda c: c == ' ' or c == '\n' or c == '\t')) | |
# token transform a parser to skip junk before applying | |
# the given parser. | |
# | |
# token :: Parser a -> Parser a | |
def token(p): | |
return mapSeq(junk, p, lambda _, x: toString(x)) | |
# nat parses a natural number. | |
# | |
# natural :: Parser Nat | |
natural = bind(token(many1(digit)), lambda x: | |
result(int(x))) | |
# identifier is a parser for an identifier, starting with | |
# a letter, followed by zero or more letters or digits. | |
# | |
# identifier :: Parser String | |
identifier = fmap(toString, token(mapSeq(letter, many(alphaNum), lambda x,y: x+y))) | |
def lazy(p_factory): | |
return lambda input: p_factory()(input) | |
### LISP | |
lpar = token(char('(')) | |
rpar = token(char(')')) | |
number = fmap(lambda x: [('NUMBER', x)], natural) | |
symbol = fmap(lambda x: [('SYMBOL', x)], identifier) | |
atom = fmap(lambda x: x, plus(symbol, number)) | |
def in_parenthesis(parser): | |
return bind(lpar, lambda _: | |
bind(parser, lambda res: | |
bind(rpar, lambda _: | |
result(res)))) | |
def _list(): | |
_parser = in_parenthesis(many1(lazy(_s_expr))) | |
return fmap(lambda x: [('LIST', x)], _parser) | |
def _s_expr(): | |
s_expr = lazy(_s_expr) | |
list = lazy(_list) | |
return plus( | |
atom, | |
plus(list, | |
in_parenthesis( | |
bind(s_expr, lambda s1: | |
bind(token(char('.')), lambda _: | |
bind(s_expr, lambda s2: | |
result([('CONS', s1 + s2)])))) | |
)) | |
) | |
s_expr = _s_expr() | |
print s_expr('(abc def (123 h) (abc . def))') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment