Last active
March 10, 2019 19:48
-
-
Save ekozhura/469615963a949f0e17123e5fadd992af to your computer and use it in GitHub Desktop.
Parser Combinators
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open Printf | |
type ('a, 'b) result = | |
| Success of 'a * 'b | |
| Failure of string | |
type 'a parser = Parser of (string -> ('a, string) result) | |
let run_parser p i = match p with | |
| Parser fn -> fn i;; | |
let empty_parser = | |
let inner str = Failure "" in Parser inner | |
let some_parser = | |
let inner str = Success ('0', str) in Parser inner | |
let char_parser charTo = | |
let inner str = | |
if (String.length str) < 1 then | |
Failure "No more input" | |
else if str.[0] == charTo then | |
let remaining = | |
String.sub str 1 ((String.length str) - 1) in | |
Success (str.[0], remaining) | |
else Failure (sprintf "Expecting '%c'. Got '%c'\n" charTo str.[0]) in | |
Parser inner | |
let and_then p1 p2 = | |
let inner str = | |
let res1 = run_parser p1 str in | |
match res1 with | |
| Failure err -> Failure err | |
| Success (val1, rem1) -> | |
let res2 = run_parser p2 rem1 in | |
match res2 with | |
| Failure err -> Failure err | |
| Success (val2, rem2) -> | |
Success ((val1, val2), rem2) in | |
Parser inner | |
let or_else p1 p2 = | |
let inner str = | |
let res1 = run_parser p1 str in | |
match res1 with | |
| Failure err -> run_parser p2 str | |
| _ -> res1 in | |
Parser inner | |
let mapP f parser = | |
let inner str = | |
let result = run_parser parser str in | |
match result with | |
| Success (value, remaining) -> Success (f value, remaining) | |
| Failure err -> Failure err in | |
Parser inner | |
let (<&>) = and_then | |
let (<|>) = or_else | |
let (<!>) = mapP | |
let (|>>) x f = mapP f x | |
let choice parsers = List.fold_left (<|>) empty_parser parsers | |
let any_of chars = chars |> List.map char_parser |> choice | |
let to_chars str = | |
let rec inner str ls = | |
match (String.length str) with | |
| 0 -> ls | |
| _ -> (str.[0] :: (inner | |
(String.sub str 1 ((String.length str) - 1))) ls | |
) in | |
inner str [] | |
let returnP x = | |
let inner str = | |
Success (x, str) in | |
Parser inner | |
let applyP fP xP = (fP <&> xP) |> mapP (fun (f, x) -> f x) | |
let (<*>) = applyP | |
let lift2 f xP yP = returnP f <*> xP <*> yP | |
let rec sequence parser_list = | |
let cons head tail = head :: tail in | |
let consP = lift2 cons in | |
match parser_list with | |
| [] -> returnP [] | |
| head::tail -> consP head (sequence tail) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* basic implementation, taken from the talk | |
"Understanding parser combinators: a deep dive" by Scott Wlaschin | |
video: https://youtu.be/RDalzi7mhdY | |
*/ | |
type result('a, 'b) = | |
| Ok('a, 'b) | |
| Error(string); | |
type parser('a) = | |
| Parser(string => result('a, string)); | |
let runParser = (parser, input) => { | |
switch(parser) { | |
| Parser(innerFn) => innerFn(input); | |
}; | |
}; | |
let parseChar = charToMatch => { | |
let innerFn = input => | |
if (String.length(input) < 1) { | |
Error("String is empty"); | |
} else { | |
let first = input.[0]; | |
if (first === charToMatch) { | |
let remaining = String.sub(input, 1, String.length(input) - 1); | |
Ok(charToMatch, remaining); | |
} else { | |
Error( | |
"Expecting " | |
++ Char.escaped(charToMatch) | |
++ ", but got " | |
++ Char.escaped(first) | |
++ " instead", | |
); | |
}; | |
}; | |
Parser(innerFn); | |
}; | |
/* basic combinators */ | |
let andThen = (parser1, parser2) => { | |
let innerFn = input => { | |
let result1 = runParser(parser1, input); | |
switch (result1) { | |
| Error(err) => Error(err) | |
| Ok(value1, remaining1) => | |
let result2 = runParser(parser2, remaining1); | |
switch (result2) { | |
| Error(err) => Error(err) | |
| Ok(value2, remaining2) => Ok((value1, value2), remaining2) | |
}; | |
}; | |
}; | |
Parser(innerFn); | |
}; | |
let orElse = (parser1, parser2) => { | |
let innerFn = input => { | |
let result1 = runParser(parser1, input); | |
switch (result1) { | |
| Ok(value1, remaining1) => result1 | |
| Error(err) => | |
let result2 = runParser(parser2, input); | |
result2; | |
}; | |
}; | |
Parser(innerFn); | |
}; | |
let mapP = (fn, parser) => { | |
let innerFn = input => { | |
let result = runParser(parser, input); | |
switch (result) { | |
| Ok(value, remaining) => | |
let newValue = fn(value); | |
Ok(newValue, remaining); | |
| Error(err) => Error(err) | |
}; | |
}; | |
Parser(innerFn); | |
}; | |
/* infix versions */ | |
let (+&&+) = andThen; | |
let (+||+) = orElse; | |
let (+$+) = (x, f) => mapP(f, x); | |
/* examples: */ | |
let parserA: parser(char) = parseChar('a'); | |
let parserB: parser(char) = parseChar('b'); | |
let parserC: parser(char) = parseChar('c'); | |
let parserBorC: parser(char) = parserB +||+ parserC; | |
let parserAthenB = parserA +&&+ parserB; | |
let mapParser = parserA +$+ (x => Char.escaped(x)); | |
let parserAthenBorC = parserA +&&+ parserBorC; | |
Js.log(runParser(parserAthenB, "abc")); /* [[97,98],"c"] */ | |
Js.log(runParser(mapParser, "abcd")); /* ["a","bcd"] */ | |
Js.log(runParser(parserAthenBorC, "abcd")); /* [[97,98],"cd"] */ | |
let combinedOr = List.fold_left((+||+), parserB, [parserA]); | |
Js.log(runParser(combinedOr, "ab")); /* [97,"b"] */ | |
Js.log(runParser(parserA +&&+ parserB +&&+ parserC +&&+ parserBorC)("abcc")); /* [[[[97,98],99],99],""] */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment