Created
November 16, 2015 14:17
-
-
Save swlaschin/cb42417079ae2c5f99db to your computer and use it in GitHub Desktop.
Understanding parser combinators. Related blog post: http://fsharpforfunandprofit.com/posts/understanding-parser-combinators/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
ParserLibrary_v1.fsx | |
Version 1 of the code for a parser library. | |
Related blog post: http://fsharpforfunandprofit.com/posts/understanding-parser-combinators/ | |
*) | |
open System | |
/// Type that represents Success/Failure in parsing | |
type Result<'a> = | |
| Success of 'a | |
| Failure of string | |
/// Type that wraps a parsing function | |
type Parser<'T> = Parser of (string -> Result<'T * string>) | |
/// Parse a single character | |
let pchar charToMatch = | |
// define a nested inner function | |
let innerFn str = | |
if String.IsNullOrEmpty(str) then | |
Failure "No more input" | |
else | |
let first = str.[0] | |
if first = charToMatch then | |
let remaining = str.[1..] | |
Success (charToMatch,remaining) | |
else | |
let msg = sprintf "Expecting '%c'. Got '%c'" charToMatch first | |
Failure msg | |
// return the "wrapped" inner function | |
Parser innerFn | |
/// Run a parser with some input | |
let run parser input = | |
// unwrap parser to get inner function | |
let (Parser innerFn) = parser | |
// call inner function with input | |
innerFn input | |
/// Combine two parsers as "A andThen B" | |
let andThen parser1 parser2 = | |
let innerFn input = | |
// run parser1 with the input | |
let result1 = run parser1 input | |
// test the result for Failure/Success | |
match result1 with | |
| Failure err -> | |
// return error from parser1 | |
Failure err | |
| Success (value1,remaining1) -> | |
// run parser2 with the remaining input | |
let result2 = run parser2 remaining1 | |
// test the result for Failure/Success | |
match result2 with | |
| Failure err -> | |
// return error from parser2 | |
Failure err | |
| Success (value2,remaining2) -> | |
// combine both values as a pair | |
let newValue = (value1,value2) | |
// return remaining input after parser2 | |
Success (newValue,remaining2) | |
// return the inner function | |
Parser innerFn | |
/// Infix version of andThen | |
let ( .>>. ) = andThen | |
/// Combine two parsers as "A orElse B" | |
let orElse parser1 parser2 = | |
let innerFn input = | |
// run parser1 with the input | |
let result1 = run parser1 input | |
// test the result for Failure/Success | |
match result1 with | |
| Success result -> | |
// if success, return the original result | |
result1 | |
| Failure err -> | |
// if failed, run parser2 with the input | |
let result2 = run parser2 input | |
// return parser2's result | |
result2 | |
// return the inner function | |
Parser innerFn | |
/// Infix version of orElse | |
let ( <|> ) = orElse | |
/// Choose any of a list of parsers | |
let choice listOfParsers = | |
List.reduce ( <|> ) listOfParsers | |
/// Choose any of a list of characters | |
let anyOf listOfChars = | |
listOfChars | |
|> List.map pchar // convert into parsers | |
|> choice |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
understanding_parser_combinators.fsx | |
Demonstrates how to build a parser and associated combinators from scratch. | |
Related blog post: http://fsharpforfunandprofit.com/posts/understanding-parser-combinators/ | |
*) | |
// ============================================= | |
// Section 1 - Parse a hard-coded character | |
// ============================================= | |
module Section1 = | |
open System | |
let A_Parser str = | |
if String.IsNullOrEmpty(str) then | |
(false,"") | |
else if str.[0] = 'A' then | |
let remaining = str.[1..] | |
(true,remaining) | |
else | |
(false,str) | |
// --------- Signature of "A_Parser" --------- | |
// val A_Parser : | |
// string -> (bool * string) | |
// ------------------------------------------- | |
let inputABC = "ABC" | |
A_Parser inputABC // (true, "BC") | |
let inputZBC = "ZBC" | |
A_Parser inputZBC // (false, "ZBC") | |
// ============================================= | |
// Section 2 - Match a specified character | |
// ============================================= | |
module Section2 = | |
open System | |
// parse a single character | |
let pchar (charToMatch,str) = | |
if String.IsNullOrEmpty(str) then | |
let msg = "No more input" | |
(msg,"") | |
else | |
let first = str.[0] | |
if first = charToMatch then | |
let remaining = str.[1..] | |
let msg = sprintf "Found %c" charToMatch | |
(msg,remaining) | |
else | |
let msg = sprintf "Expecting '%c'. Got '%c'" charToMatch first | |
(msg,str) | |
// --------- Signature of "pchar" --------- | |
// val pchar : | |
// (char * string) -> (string * string) | |
// --------------------------------------------- | |
// --------- | |
// test | |
// --------- | |
let inputABC = "ABC" | |
pchar('A',inputABC) // ("Found A", "BC") | |
let inputZBC = "ZBC" | |
pchar('A',inputZBC) // ("Expecting 'A'. Got 'Z'", "ZBC") | |
pchar('Z',inputZBC) // ("Found Z", "BC") | |
// ============================================= | |
// Section 3 - Return a Success/Failure | |
// ============================================= | |
module Section3 = | |
open System | |
// Result type | |
type Result<'a> = | |
| Success of 'a | |
| Failure of string | |
// parse a single character | |
let pchar (charToMatch,str) = | |
if String.IsNullOrEmpty(str) then | |
Failure "No more input" | |
else | |
let first = str.[0] | |
if first = charToMatch then | |
let remaining = str.[1..] | |
Success (charToMatch,remaining) | |
else | |
let msg = sprintf "Expecting '%c'. Got '%c'" charToMatch first | |
Failure msg | |
// --------- Signature of "pchar" --------- | |
// pchar : | |
// (char * string) -> Result<char * string> | |
// --------------------------------------------- | |
// --------- | |
// test "pchar" | |
// --------- | |
let inputABC = "ABC" | |
pchar('A',inputABC) // Success ('A', "BC") | |
let inputZBC = "ZBC" | |
pchar('A',inputZBC) // Failure "Expecting 'A'. Got 'Z'" | |
pchar('Z',inputZBC) // Success ('Z', "BC") | |
// ============================================= | |
// currying examples | |
// ============================================= | |
(* | |
module CurryingExamples = | |
let add x y = | |
x + y | |
let add x = | |
fun y -> x + y // return a lambda | |
let add x = | |
let innerFn y = x + y | |
innerFn // return innerFn | |
let add3 x y z = | |
x + y + z | |
let add3 x = | |
fun y -> | |
(fun z -> x + y + z) | |
*) | |
// ============================================= | |
// Section 4 - Use currying | |
// ============================================= | |
module Section4a = | |
open System | |
// Result type | |
type Result<'a> = | |
| Success of 'a | |
| Failure of string | |
// parse a single character | |
let pchar charToMatch str = | |
if String.IsNullOrEmpty(str) then | |
Failure "No more input" | |
else | |
let first = str.[0] | |
if first = charToMatch then | |
let remaining = str.[1..] | |
Success (charToMatch,remaining) | |
else | |
let msg = sprintf "Expecting '%c'. Got '%c'" charToMatch first | |
Failure msg | |
// --------- Signature of "pchar" --------- | |
// pchar : | |
// char -> string -> Result<char * string> | |
// --------------------------------------------- | |
let parseA = pchar 'A' | |
// --------- Signature of "parseA" --------- | |
// parseA : | |
// string -> Result<char * string> | |
// --------------------------------------------- | |
// --------- | |
// test "parseA" | |
// --------- | |
let inputABC = "ABC" | |
let inputZBC = "ZBC" | |
parseA inputABC // Success ('A', "BC") | |
parseA inputZBC // Failure "Expecting 'A'. Got 'Z'" | |
let parseZ = pchar 'Z' | |
parseZ inputZBC // Success ('Z', "BC") | |
module Section4b = | |
open System | |
// Result type | |
type Result<'a> = | |
| Success of 'a | |
| Failure of string | |
// parse a single character | |
// same as Section4a, but with explicit inner function | |
let pchar charToMatch = | |
// define a nested inner function | |
let innerFn str = | |
if String.IsNullOrEmpty(str) then | |
Failure "No more input" | |
else | |
let first = str.[0] | |
if first = charToMatch then | |
let remaining = str.[1..] | |
Success (charToMatch,remaining) | |
else | |
let msg = sprintf "Expecting '%c'. Got '%c'" charToMatch first | |
Failure msg | |
// return the inner function | |
innerFn | |
// --------- Signature of "pchar" --------- | |
// SAME signature as version 4a! | |
// pchar : | |
// char -> string -> Result<char * string> | |
// --------------------------------------------- | |
let parseA = pchar 'A' | |
// --------- Signature of "parseA" --------- | |
// SAME signature as version 4a! | |
// parseA : | |
// string -> Result<char * string> | |
// --------------------------------------------- | |
// --------- | |
// test "parseA" | |
// --------- | |
let inputABC = "ABC" | |
parseA inputABC // Success ('A', "BC") | |
let inputZBC = "ZBC" | |
parseA inputZBC // Failure "Expecting 'A'. Got 'Z'" | |
let parseZ = pchar 'Z' | |
parseZ inputZBC // Success ('Z', "BC") | |
// ============================================= | |
// Section 5 - Create a type to wrap the parser function | |
// ============================================= | |
module Section5 = | |
open System | |
/// Type that represents Success/Failure in parsing | |
type Result<'a> = | |
| Success of 'a | |
| Failure of string | |
/// Type that wraps a parsing function | |
type Parser<'T> = Parser of (string -> Result<'T * string>) | |
/// Parse a single character | |
let pchar charToMatch = | |
// define a nested inner function | |
let innerFn str = | |
if String.IsNullOrEmpty(str) then | |
Failure "No more input" | |
else | |
let first = str.[0] | |
if first = charToMatch then | |
let remaining = str.[1..] | |
Success (charToMatch,remaining) | |
else | |
let msg = sprintf "Expecting '%c'. Got '%c'" charToMatch first | |
Failure msg | |
// return the "wrapped" inner function | |
Parser innerFn | |
// --------- Signature of "parseA" --------- | |
// pchar : | |
// char -> Parser<char> | |
// ----------------------------------------- | |
// --------- Signature of "parseA" --------- | |
// parseA : | |
// Parser<char> | |
// --------------------------------------------- | |
(* | |
let inputABC = "ABC" | |
let parseA = pchar 'A' | |
parseA inputABC // compiler error | |
// error FS0003: This value is not a function and cannot be applied | |
// so we need to create a helper function | |
*) | |
// ============================ | |
// Introducing "run" | |
// ============================ | |
/// Run a parser with some input | |
let run parser input = | |
// unwrap parser to get inner function | |
let (Parser innerFn) = parser | |
// call inner function with input | |
innerFn input | |
// --------- Signature of "run" --------- | |
// run : | |
// parser:Parser<'a> -> input:string -> Result<'a * string> | |
// -------------------------------------- | |
// --------- | |
// test "run" | |
// --------- | |
module Test_Run = | |
let parseA = pchar 'A' | |
let inputABC = "ABC" | |
run parseA inputABC // Success ('A', "BC") | |
let inputZBC = "ZBC" | |
run parseA inputZBC // Failure "Expecting 'A'. Got 'Z'" | |
// ============================================= | |
// Section 6 - Combining two parsers in sequence: the "and then" combinator | |
// ============================================= | |
(* | |
// given two parsers... | |
let parseA = pchar 'A' // Parser<char> | |
let parseB = pchar 'B' | |
// ...how to compose the two functions? | |
let parseAThenB = parseA >> parseB // compiler error | |
*) | |
// ============================ | |
// Implementing "andThen" | |
// ============================ | |
/// Combine two parsers as "A andThen B" | |
let andThen parser1 parser2 = | |
let innerFn input = | |
// run parser1 with the input | |
let result1 = run parser1 input | |
// test the result for Failure/Success | |
match result1 with | |
| Failure err -> | |
// return error from parser1 | |
Failure err | |
| Success (value1,remaining1) -> | |
// run parser2 with the remaining input | |
let result2 = run parser2 remaining1 | |
// test the result for Failure/Success | |
match result2 with | |
| Failure err -> | |
// return error from parser2 | |
Failure err | |
| Success (value2,remaining2) -> | |
// combine both values as a pair | |
let newValue = (value1,value2) | |
// return remaining input after parser2 | |
Success (newValue,remaining2) | |
// return the inner function | |
Parser innerFn | |
/// Infix version of andThen | |
let ( .>>. ) = andThen | |
// --------- Signature of "andThen" --------- | |
// val andThen : | |
// parser1:Parser<'a> -> parser2:Parser<'b> -> Parser<'a * 'b> | |
// ------------------------------------------ | |
// --------- | |
// test .>>. | |
// --------- | |
module Test_AndThen = | |
let parseA = pchar 'A' // Parser<char> | |
let parseB = pchar 'B' | |
let parseAThenB = parseA .>>. parseB | |
run parseAThenB "ABC" // Success (('A', 'B'), "C") | |
run parseAThenB "ZBC" // Failure "Expecting 'A'. Got 'Z'" | |
run parseAThenB "AZC" // Failure "Expecting 'B'. Got 'Z'" | |
// ============================================= | |
// Section 7 - Choosing between two parsers: the "or else" combinator | |
// ============================================= | |
// ============================ | |
// Implementing "orElse" | |
// ============================ | |
/// Combine two parsers as "A orElse B" | |
let orElse parser1 parser2 = | |
let innerFn input = | |
// run parser1 with the input | |
let result1 = run parser1 input | |
// test the result for Failure/Success | |
match result1 with | |
| Success result -> | |
// if success, return the original result | |
result1 | |
| Failure err -> | |
// if failed, run parser2 with the input | |
let result2 = run parser2 input | |
// return parser2's result | |
result2 | |
// return the inner function | |
Parser innerFn | |
/// Infix version of orElse | |
let ( <|> ) = orElse | |
// --------- Signature of "orElse" --------- | |
// val orElse : | |
// parser1:Parser<'a> -> parser2:Parser<'a> -> Parser<'a> | |
// ------------------------------------------ | |
// --------- | |
// test <|> | |
// --------- | |
module Test_OrElse = | |
let parseA = pchar 'A' | |
let parseB = pchar 'B' | |
let parseAOrElseB = parseA <|> parseB | |
run parseAOrElseB "AZZ" // Success ('A', "ZZ") | |
run parseAOrElseB "BZZ" // Success ('B', "ZZ") | |
run parseAOrElseB "CZZ" // Failure "Expecting 'B'. Got 'C'" | |
// --------- | |
// combining "AndThen" and "OrElse" | |
// --------- | |
let parseC = pchar 'C' | |
let bOrElseC = parseB <|> parseC | |
let aAndThenBorC = parseA .>>. bOrElseC | |
run aAndThenBorC "ABZ" // Success (('A', 'B'), "Z") | |
run aAndThenBorC "ACZ" // Success (('A', 'C'), "Z") | |
run aAndThenBorC "QBZ" // Failure "Expecting 'A'. Got 'Q'" | |
run aAndThenBorC "AQZ" // Failure "Expecting 'C'. Got 'Q'" | |
// ============================================= | |
// Section 8 - Choosing from a list of parsers: "choice" and "anyOf" | |
// ============================================= | |
// ============================ | |
// Introducing "choice" | |
// ============================ | |
/// Choose any of a list of parsers | |
let choice listOfParsers = | |
List.reduce ( <|> ) listOfParsers | |
// --------- Signature of "choice" --------- | |
// val choice : | |
// listOfParsers:Parser<'a> list -> Parser<'a> | |
// ------------------------------------------ | |
// --------- | |
// test "choice" | |
// --------- | |
module Test_Choice = | |
let digitChars = ['0'..'9'] | |
// map each char to a Parser using pchar | |
let digitParsers = | |
List.map pchar digitChars | |
// Parser<char> list | |
// combine all parsers using choice | |
let parseDigit = choice digitParsers | |
// Parser<char> | |
run parseDigit "1ZZ" // Success ('1', "ZZ") | |
run parseDigit "2ZZ" // Success ('2', "ZZ") | |
run parseDigit "9ZZ" // Success ('9', "ZZ") | |
run parseDigit "AZZ" // Failure "Expecting '9'. Got 'A'" | |
// ============================ | |
// Introducing "anyOf" | |
// ============================ | |
/// Choose any of a list of characters | |
let anyOf listOfChars = | |
listOfChars | |
|> List.map pchar // convert into parsers | |
|> choice | |
// --------- Signature of "anyOf" --------- | |
// val anyOf : | |
// listOfChars:char list -> Parser<char> | |
// ------------------------------------------ | |
// --------- | |
// test "anyOf" | |
// --------- | |
module Test_AnyOf = | |
let parseLowercase = | |
anyOf ['a'..'z'] | |
let parseDigit = | |
anyOf ['0'..'9'] | |
run parseLowercase "aBC" // Success ('a', "BC") | |
run parseLowercase "ABC" // Failure "Expecting 'z'. Got 'A'" | |
run parseDigit "1ABC" // Success ("1", "ABC") | |
run parseDigit "9ABC" // Success ("9", "ABC") | |
run parseDigit "|ABC" // Failure "Expecting '9'. Got '|'" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment