Created
November 5, 2019 02:16
-
-
Save JasonKleban/c5d610c8aafd9b9ea2efff63a2e0b9b4 to your computer and use it in GitHub Desktop.
Minimal monadic parsing combinator, example grammar, and test cases
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#nowarn "40" | |
open System | |
// Complete Monadic Parsing "Library" | |
type Parser<'r> = Parser of (char list -> ('r*char list) list) | |
let parse (Parser p) = p | |
let (>>=) p f = Parser(fun cs -> | |
List.concat [for (r,cs') in parse p cs -> parse (f r) cs']) | |
let (>>) p q = p >>= fun _ -> q | |
let (<|>) p q = Parser(fun cs -> | |
match parse p cs with | |
| [] -> parse q cs | |
| rs -> rs) | |
let mreturn r = Parser(fun cs -> [(r,cs)]) | |
let lambda = Parser(fun _ -> []) | |
let item = Parser(fun cs -> | |
match cs with [] -> [] | c::cs' -> [(c,cs')]) | |
let sat cond = | |
item >>= fun c -> if cond c then mreturn c else lambda | |
let char c = sat ((=)c) | |
let rec many0 p = many1 p <|> mreturn [] | |
and many1 p = p >>= fun r -> many0 p >>= fun rs -> mreturn (r::rs) | |
// Command-line style Argument tokenizer (sort of) | |
let rec parseLine s = // parse a `line` from a sequence of chars that make up the string | |
fst <| Seq.head (parse line <| Seq.toList s) // parse returns the parsed construction (multiple, if ambiguous, so we take the `Seq.head`) AND the remaining unmatched characters | |
// in a tuple, but we just want the former of those two components (`fst`) | |
and line = | |
many0 (char ' ') // eat any leading space characters | |
>> many0 value // expect zero or more `value`s | |
>>= fun values -> mreturn values | |
and value = | |
(quotedValue <|> unquotedValue) // a value can be a quoted or unquoted value | |
>>= fun v -> many0 (char ' ') >> mreturn v | |
and quotedValue = | |
char '\"' // starts with a " | |
>> many0 ((char '\\' >> char '\"') <|> sat (fun c -> c <> '\"')) // anything until a ", unless it is escaped | |
>>= fun v -> char '\"' >> mreturn (String.Concat(v)) // ends with a " | |
and unquotedValue = | |
many1 ((char '\\' >> char '\"') <|> sat (fun c -> c <> '\"' && not <| Char.IsWhiteSpace(c))) // anything until whitespace or " unless it is escaped | |
>>= fun v -> mreturn (String.Concat(v)) | |
// Try: (parse line <| Seq.toList "\"One A\" \"Two B\" \"???") |> printfn "%A" // Incomplete match example | |
parseLine "" |> printfn "%A" // [] | |
parseLine " " |> printfn "%A" // [] | |
parseLine "One" |> printfn "%A" // ["One"] | |
parseLine "One Two" |> printfn "%A" // ["One"; "Two"] | |
parseLine "\"One A\"" |> printfn "%A" // ["One A"] | |
parseLine "\"One A\" \"Two B\"" |> printfn "%A" // ["One A"; "Two B"] | |
parseLine " Zero \"One A\" Two\"Thre\\\"e C\"Four " |> printfn "%A" // ["Zero"; "One A"; "Two"; "Thre\"e C"; "Four"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment