Created
October 31, 2011 16:32
-
-
Save joshcough/1327907 to your computer and use it in GitHub Desktop.
Parser Combinator in F#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type ParseResult<'a> = | |
| Success of 'a * string | |
| Failure of string | |
type Parser<'a> = | |
abstract Parse: string -> ParseResult<'a> | |
type MappedParser<'a, 'b>(f: 'a -> 'b, m:Parser<'a>) = | |
interface Parser<'b> with | |
member p.Parse(s) = match m.Parse(s) with | |
| Success (a, rest) -> Success(f(a), rest) | |
| Failure (message) -> Failure(message) | |
type DiscardingParser<'a, 'b>(o:Parser<'a>, v:Lazy<'b>) = | |
interface Parser<'b> with | |
member p.Parse(s) = match o.Parse(s) with | |
| Success (_, rest) -> Success(v.Force(), rest) | |
| Failure (message) -> Failure(message) | |
type OptionalParser<'a>(o:Parser<'a>) = | |
interface Parser<Option<'a>> with | |
member p.Parse(s) = match o.Parse(s) with | |
| Success (v, rest) -> Success(Some(v), rest) | |
| Failure (message) -> Success(None, s) | |
type WordParser(findMe:string) = | |
interface Parser<string> with | |
member p.Parse(s) = | |
if findMe.Length <= s.Length && s.Substring(0, findMe.Length).Equals(findMe) | |
then Success(findMe, s.Substring(findMe.Length)) | |
else Failure("didn't find: " + findMe) | |
type AndParser<'a, 'b>(left:Lazy<Parser<'a>>, right:Lazy<Parser<'b>>) = | |
interface Parser<'a * 'b> with | |
member p.Parse(s) = match left.Force().Parse(s) with | |
| Success (al, rest) -> match right.Force().Parse(rest) with | |
| Success (ar, rest) -> Success ((al, ar), rest) | |
| Failure (message) -> Failure (message) | |
| Failure (message) -> Failure (message) | |
type OrParser<'a>(left:Parser<'a>, right:Parser<'a>) = | |
interface Parser<'a> with | |
member p.Parse(s) = match left.Parse(s) with | |
| Success (al, rest) -> Success(al, rest) | |
| Failure (leftMessage) -> match right.Parse(s) with | |
| Success (ar, rest) -> Success (ar, rest) | |
| Failure (rightMessage) -> Failure (leftMessage + " and " + rightMessage) | |
type NeverMatch<'a>() = | |
interface Parser<'a> with | |
member p.Parse(s) = Failure("never") | |
let inline (^^) (p:Parser<'a>)(f: 'a -> 'b) = MappedParser<'a, 'b>(f, p) :> Parser<'b> | |
let inline (^^^) (p:Parser<'a>)(v:Lazy<'b>) = DiscardingParser<'a, 'b>(p, v) :> Parser<'b> | |
let inline (|||) (l:Parser<'a>)(r:Parser<'b>) = OrParser<'a>(l, r) :> Parser<'a> /// hmm, b is weird here | |
let inline (++) (l:Parser<'a>)(r:Parser<'b>) = AndParser(lazy(l), lazy(r)) | |
let inline (+++) (l:Parser<'a>)(r:Lazy<Parser<'b>>) = AndParser(lazy(l),r) | |
let opt<'a>(p:Parser<'a>) = OptionalParser(p) :> Parser<Option<'a>> | |
let never<'a> () = NeverMatch<'a>() :> Parser<'a> | |
let rec oneOf<'a> (parsers: List<Parser<'a>>) : Parser<'a> = | |
match parsers with | |
| p :: ps -> p ||| oneOf<'a>(ps) | |
| _ -> never() | |
let cons (x, xs) = x :: xs | |
let matchChar (c:char) = WordParser(string c) ^^ (fun s -> s.[0]) | |
let emptyString = WordParser("") :> Parser<string> | |
let rec zeroOrMore<'a> (p:Parser<'a>): Parser<List<'a>> = | |
(p +++ lazy(zeroOrMore<'a>(p)) ^^ cons) ||| (emptyString ^^^ lazy([])) | |
let oneOrMore<'a> (p:Parser<'a>): Parser<List<'a>> = p ++ zeroOrMore<'a>(p) ^^ cons | |
let repsep<'a, 'b> (pa:Parser<'a>, pb:Parser<'b>) : Parser<List<'a>> = | |
let absParser = pa ++ pb | |
let asParser = absParser ^^ fst | |
let manyAsParser = zeroOrMore(asParser) | |
let optAParser = opt(pa) ^^ Option.toList | |
manyAsParser ++ optAParser ^^ (fun (l, r) -> l @ r) | |
let oneOfChars (cs:List<char>) : Parser<char> = oneOf (cs |> List.map matchChar) | |
let oneToNine = oneOfChars ['1'..'9'] | |
let one = matchChar '1' | |
let zeroToNine = oneOfChars ['0'..'9'] | |
let digit = zeroToNine | |
let charListToString (cs: List<char>) : string = cs |> List.map string |> List.reduce (+) | |
let charListToInt (cs: List<char>) : int = cs |> charListToString |> int | |
let number: Parser<int> = oneOrMore(digit) ^^ charListToInt | |
let space = oneOfChars [' '; '\n'; '\t'] | |
let spaces = zeroOrMore space | |
let numbers = repsep(number, spaces) | |
let letter = (oneOfChars ['a'..'z']) ||| (oneOfChars ['a'..'z']) | |
let underscore = matchChar '_' | |
let idBody = zeroOrMore(oneOf([letter; digit; underscore])) | |
let id = letter ++ idBody ^^ (fun (x, xs) -> x :: xs |> charListToString) | |
type SExpr = | |
| Number of int | |
| Atom of string | |
| SList of List<SExpr> | |
let rec sexpr: Parser<SExpr> = oneOf [number ^^ Number; id ^^ Atom; list ^^ SList] | |
and listStart<'a> = matchChar('(') ^^^ lazy([]) | |
and listEnd<'a> = matchChar(')') ^^^ lazy([]) | |
and list = listStart +++ lazy(repsep(sexpr, spaces)) +++ lazy(listEnd) ^^ (fun ((_, l), _) -> l) | |
printfn "x in xxx: %A" (matchChar('x').Parse("xxx")) | |
printfn "x or y in yaa: %A" ((oneOf [(matchChar 'x'); (matchChar 'y')]).Parse("yaa")) | |
printfn "x in xxx: %A" (matchChar('x').Parse("xxx")) | |
printfn "digit? 234: %A" (digit.Parse("234")) | |
printfn "zero or more 1's in 1222: %A" ((zeroOrMore(one)).Parse("1222")) | |
printfn "zero or more 1's in 111222: %A" ((zeroOrMore(one)).Parse("111222")) | |
printfn "zero or more 1's in 222: %A" ((zeroOrMore(one)).Parse("222")) | |
printfn "zero or more 1's in 111: %A" ((zeroOrMore(one)).Parse("111")) | |
printfn "zero or more 1's in empty string: %A" ((zeroOrMore(one)).Parse("")) | |
printfn "one or more 1's in 1222: %A" ((oneOrMore(one)).Parse("1222")) | |
printfn "one or more 1's in 111222: %A" ((oneOrMore(one)).Parse("111222")) | |
printfn "one or more 1's in 222: %A" ((oneOrMore(one)).Parse("222")) | |
printfn "one or more 1's in 111: %A" ((oneOrMore(one)).Parse("111")) | |
printfn "one or more 1's in empty string: %A" ((oneOrMore(one)).Parse("")) | |
printfn "a or b in accc: %A" ((oneOfChars ['a';'b']).Parse("accc")) | |
printfn "number in 1234: %A" (number.Parse("1234")) | |
printfn "spaces in ' ' : %A" (spaces.Parse(" ")) | |
printfn "numbers in '1 2 3 4 5' : %A" (numbers.Parse("1 2 3 4 5")) | |
printfn "numbers in '123 2 3673 4 5' : %A" (numbers.Parse("123 2 3673 4 5")) | |
printfn "id in 'x5' : %A" (id.Parse("x5")) | |
printfn "id in 'x_123' : %A" (id.Parse("x_123")) | |
printfn "id in 'x__1__z' : %A" (id.Parse("x__1__z")) | |
printfn "id in 'x__' : %A" (id.Parse("x__")) | |
printfn "sexpr in 'x__' : %A" (sexpr.Parse("x__")) | |
printfn "sexpr in 'x5' : %A" (sexpr.Parse("x5")) | |
printfn "sexpr in '5' : %A" (sexpr.Parse("5")) | |
printfn "sexpr in '(5)' : %A" (sexpr.Parse("(5)")) | |
printfn "sexpr in '(5 x)' : %A" (sexpr.Parse("(5 x)")) | |
printfn "sexpr in '(5 x x 5)' : %A" (sexpr.Parse("(5 x x 5)")) | |
printfn "sexpr in '(5 (x x) 5)' : %A" (sexpr.Parse("(5 (x x) 5)")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment