Skip to content

Instantly share code, notes, and snippets.

let parseExpression (grammar:Map<string,GrammarRule<'a>>) start (input:string) =
let rec parse offset = function
| Terminal x ->
let e = offset + x.Length - 1
if e < input.Length && input.[offset..e] = x then (TerminalSymbol x, e + 1) else (Unmatched, offset)
| TerminalOneOf x -> match currentCharacter input offset with
| Some c -> if x.Contains(c) then (TerminalSymbol c, offset + c.Length) else (Unmatched, offset)
| None -> (Unmatched, offset)
| TerminalWildcard -> match currentCharacter input offset with
| Some c -> (TerminalSymbol c, offset + c.Length)
let currentCharacter (s:string) i =
if i >= s.Length then None
else if Char.IsSurrogatePair(s, i) then Some s.[i..i+1] else Some s.[i..i]
type Expression =
| Terminal of string
| TerminalOneOf of string
| TerminalUnicode of UnicodeCategory
| TerminalWildcard
| NonTerminal of string
| Epsilon
| Sequence of Expression list
| Choice of Expression list
| ZeroOrMore of Expression
hexDigit <- [0123456789abcdefABCDEF]
escapedCharacter <- '\\' ([abfnrtv0\\"'[\]] / ('u' hexDigit hexDigit hexDigit hexDigit))
safeCharacter <- escapedCharacter / (!["\\\n] <anychar>)
oneofCharacter <- escapedCharacter / (![\\\]] <anychar>)
terminalUnicode <- '{' ( "Lu" / "Ll" / "Lt" / "Lm" / "Lo" / "Mn" / "Mc" / "Me" / "Nd" / "Nl" / "No" / "Pc" / "Pd" /
"Ps" / "Pe" / "Pi" / "Pf" / "Po" / "Sm" / "Sc" / "Sk" / "So" / "Zs" / "Zl" / "Zp" / "Cc" /
"Cf" / "Cs" / "Co" / "Cn" ) '}'
terminalOneOf <- '[' oneofCharacter+ ']'
terminalCharacter <- '\'' safeCharacter '\''
terminalWord <- '\"' safeCharacter+ '\"'
type BooleanExpr =
| Atom of bool
| Binary of BooleanExpr * string * BooleanExpr
| Not of BooleanExpr
let rec evalBool = function
| Atom x -> x
| Binary (left, "&", right) -> if evalBool left then evalBool right else false
| Binary (left, "|", right) -> if evalBool left then true else evalBool right
| Binary (_, x, _) -> failwith <| "Unexpected binary operator: " + x
let g = Map.ofList [("bool", boolRule); ("paren", parenRule); ("not", notRule); ("atom", atomRule);
("and", andRule); ("or", orRule); ("expr", exprRule); ("start", startRule)]
// bool <- "true" / "false"
let boolRule = GrammarRule<bool>(Choice [Terminal "true"; Terminal "false"], (fun s _ -> Parsed (s = "true")))
// paren <- "(" expr ")"
let parseParen _ = function
| Production [TerminalSymbol "("; x; TerminalSymbol ")"] -> x
| x -> unexpected x
let parenRule = GrammarRule<bool>(Sequence [Terminal "("; NonTerminal "expr"; Terminal ")"], parseParen)
// not <- "!" atom
| NonTerminal x ->
let rule = grammar.[x]
match parse offset rule.Prod with
| (Unmatched, _) as y -> y
| (parsed, endOffset) -> (rule.Func input.[offset..endOffset - 1] parsed, endOffset)
// ...
parse 0 <| NonTerminal start
type GrammarRule<'a>(prod:Expression, func:(string -> ParseResult<'a> -> ParseResult<'a>)) =
member this.Prod = prod
member this.Func = func
let parseExpression (grammar:Map<string,GrammarRule<'a>>) start (input:string) =
type ParseResult<'a> =
| Parsed of 'a
| TerminalSymbol of string
| Production of ParseResult<'a> list
| EmptyMatch
| Unmatched