Last active
August 29, 2015 14:16
-
-
Save praeclarum/f17b3ec9a035b4d434fd to your computer and use it in GitHub Desktop.
Parser for a little language in F#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Parser = | |
type Position = | |
{ | |
Code : string | |
Index : int | |
} | |
let ws p = | |
if p.Index >= p.Code.Length then p | |
else | |
if Char.IsWhiteSpace (p.Code.[p.Index]) then | |
let mutable i = p.Index + 1 | |
while Char.IsWhiteSpace (p.Code.[i]) do i <- i + 1 | |
{ p with Index = i } | |
else p | |
let regexParser regex = | |
let re = System.Text.RegularExpressions.Regex ("\\G" + regex) | |
fun p -> | |
let m = re.Match (p.Code, p.Index) | |
if not m.Success then None | |
else Some (m.Value, { p with Index = p.Index + m.Length }) | |
let loop parser (p : Position) = | |
let rec l a p = | |
match parser p with | |
| Some (x, p2) -> l (x::a) (ws p2) | |
| None -> match a with [] -> None | _ -> Some (a, p) | |
match l [] p with Some (l, p) -> Some (l |> List.rev, p) | _ -> None | |
let ( &&& ) a b p = | |
match a p with | |
| Some (a1, p2) -> match b (ws p2) with Some (a2, p3) -> Some ((a1, a2), p3) | _ -> None | |
| None -> None | |
let icult = System.Globalization.CultureInfo.InvariantCulture | |
let opprec (op,_) = | |
match op with | |
| "*" | "/" -> 14 | |
| "+" | "-" -> 13 | |
| "<" | ">" -> 11 | |
| "=" -> 3 | |
| "" -> 1000 | |
| x -> failwithf "Op not supp %A" x | |
let optree ops = | |
let geto = fst | |
let gete = snd | |
let n = Seq.length ops | |
let rec tree b e = | |
match e - b with | |
| 0 -> failwithf "Can't tree 0" | |
| 1 -> ops |> Seq.nth b |> gete | |
| 2 -> let eo = ops |> Seq.nth (e-1) | |
BinaryExpression (ops |> Seq.nth b |> gete, geto eo, gete eo) | |
| _ -> | |
let sops = ops |> Seq.skip (b + 1) |> Seq.take (e - b - 1) | |
let minprec = sops |> Seq.sortBy opprec |> Seq.head | |
let minpreci = b + 1 + (sops |> Seq.findIndex (fun x -> geto x = geto minprec)) | |
BinaryExpression (tree b minpreci, geto minprec, tree (minpreci) e) | |
tree 0 n | |
let (|PNumber|_|) = regexParser "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?" | |
let (|PIdentifier|_|) = regexParser "[\\p{L}\\$\\_][\\p{L}\\p{N}\\$\\_]*" | |
let (|POperator|_|) = regexParser "[\\/\\=\\-\\+\\!\\*\\%\\<\\>\\&\\|\\^\\~\\?]+" | |
let (|PKeyword|_|) k = function | |
| PIdentifier (n, p2) when n = k -> Some (k, p2) | |
| _ -> None | |
let rec (|PExpression|_|) = function | |
| PPrefixExpression (a1, p2) -> | |
match ws p2 with | |
| PBinaryExpressions (bes, p3) -> | |
let ops = ("", a1) :: bes | |
Some (optree ops, p3) | |
| _ -> Some (a1, p2) | |
| _ -> None | |
and (|PPrefixExpression|_|) = function | |
| PPrefixOperator (a1, p2) -> | |
match ws p2 with | |
| PPostfixExpression (a2, p3) -> Some (UnaryExpression (a1, a2), p3) | |
| _ -> None | |
| PPostfixExpression (a1, p2) -> Some (a1, p2) | |
| _ -> None | |
and (|PPrefixOperator|_|) = (|POperator|_|) | |
and (|PBinaryExpression|_|) = function | |
| PAssignBinaryExpression x -> Some x | |
| POpBinaryExpression x -> Some x | |
| _ -> None | |
and (|PAssignOperator|_|) = regexParser "\\=" | |
and (|PAssignBinaryExpression|_|) = (|PAssignOperator|_|) &&& (|PPrefixExpression|_|) | |
and (|PBinaryOperator|_|) = (|POperator|_|) | |
and (|POpBinaryExpression|_|) = (|PBinaryOperator|_|) &&& (|PPrefixExpression|_|) | |
and (|PBinaryExpressions|_|) = loop (|PBinaryExpression|_|) | |
and (|PPostfixExpression|_|) = function | |
| PPrimaryExpression (a1, p2) -> Some (a1, p2) | |
| _ -> None | |
and (|PPrimaryExpression|_|) = function | |
| PIdentifier (name, p2) -> Some (VariableExpression name, p2) | |
| PLiteralExpression (a1, p2) -> Some (a1, p2) | |
| _ -> None | |
and (|PLiteralExpression|_|) = function | |
| PNumber (ns, p2) -> Some (NumberExpression (Double.Parse (ns, icult)), p2) | |
| _ -> None | |
and (|PStatement|_|) = function | |
| PExpression (e, p2) -> Some (ExpressionStatement e, p2) | |
| _ -> None | |
let parse = loop (|PStatement|_|) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment