Last active
August 29, 2015 14:05
-
-
Save loosechainsaw/421eff5b0430a6b0064f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace JsonParser | |
open System | |
open NUnit.Framework | |
module Json = | |
type Token = | |
| String of string | |
| Number of string | |
| Null | |
| True | |
| False | |
| OpenBracket | |
| CloseBracket | |
| OpenParen | |
| CloseParen | |
| OpenBrace | |
| CloseBrace | |
| Colon | |
| Comma | |
let private (|MatchColon|_|) c = | |
if c = ':' then | |
Some(c) | |
else | |
None | |
let private (|MatchComma|_|) c = | |
if c = ',' then | |
Some(c) | |
else | |
None | |
let private (|MatchDoubleQuotes|_|) c = | |
if c = '"' then | |
Some(c) | |
else | |
None | |
let private (|MatchPlus|_|) c = | |
if c = '+' then | |
Some(c) | |
else | |
None | |
let private (|MatchMinus|_|) c = | |
if c = '-' then | |
Some(c) | |
else | |
None | |
let private (|MatchOpenParen|_|) c = | |
if c = '(' then | |
Some(c) | |
else | |
None | |
let private (|MatchCloseBrace|_|) c = | |
if c = '}' then | |
Some(c) | |
else | |
None | |
let private (|MatchOpenBrace|_|) c = | |
if c = '{' then | |
Some(c) | |
else | |
None | |
let private (|MatchCloseParen|_|) c = | |
if c = ')' then | |
Some(c) | |
else | |
None | |
let private (|MatchOpenBracket|_|) c = | |
if c = '[' then | |
Some(c) | |
else | |
None | |
let private (|MatchCloseBracket|_|) c = | |
if c = ']' then | |
Some(c) | |
else | |
None | |
let private (|MatchEscapedDoubleQuotes|_|) c = | |
if c = '\"' then | |
Some(c) | |
else | |
None | |
let private (|MatchWhitespace|_|) c = | |
if Char.IsWhiteSpace(c) then | |
Some("") | |
else | |
None | |
let private (|MatchDigit|_|) c = | |
if Char.IsDigit(c) then | |
Some(c) | |
else | |
None | |
let rec private scan_string s (acc: string) = | |
match s with | |
| [] -> (s, acc) | |
| '\\' :: MatchDoubleQuotes _ :: _ :: t -> | |
scan_string t ( acc + "\"") | |
| MatchDoubleQuotes _ :: t -> | |
(t , acc) | |
| h :: t -> | |
scan_string t <| acc + string h | |
| _ -> failwith "Invalid string" | |
let rec private scan_digit (s:char list) (acc:string) = | |
match s with | |
| [] -> (s,acc) | |
| MatchComma _ :: t -> (s, acc) | |
| MatchColon _ :: t -> (s,acc) | |
| MatchWhitespace _ :: t -> (s,acc) | |
| MatchCloseBracket _ :: t -> (s,acc) | |
| MatchCloseBrace _ :: t -> (s,acc) | |
| MatchDigit h :: t -> scan_digit t (acc + string h) | |
| _ -> failwith "Unexpected character" | |
let private literal_map input = | |
match input with | |
| "null" -> Some(Null) | |
| "true" -> Some(True) | |
| "false" -> Some(False) | |
| _ -> None | |
let rec private scan_other (s:char list) (acc:string) = | |
match s with | |
| [] -> (s, literal_map acc) | |
| MatchComma _ :: t -> (s, literal_map acc) | |
| MatchColon _ :: t -> (s,literal_map acc) | |
| MatchWhitespace _ :: t -> (t,literal_map acc) | |
| h :: t -> scan_other t (acc + string h) | |
| _ -> failwith "Unexpected character" | |
let tokenizer (input:string) = | |
let rec tokenizer_impl (input:char list) (acc: Token list) = | |
match input with | |
| [] -> acc | |
| MatchWhitespace _ :: t -> tokenizer_impl t acc | |
| MatchColon _ :: t -> | |
tokenizer_impl t ( Colon :: acc) | |
| MatchComma _ :: t -> | |
tokenizer_impl t ( Comma :: acc) | |
| MatchOpenParen _ :: t -> | |
tokenizer_impl t ( OpenParen :: acc) | |
| MatchCloseParen _ :: t -> | |
tokenizer_impl t ( CloseParen :: acc) | |
| MatchCloseBracket _ :: t -> | |
tokenizer_impl t ( CloseBracket :: acc) | |
| MatchOpenBracket _ :: t -> | |
tokenizer_impl t ( OpenBracket :: acc) | |
| MatchCloseBrace _ :: t -> | |
tokenizer_impl t ( CloseBrace :: acc) | |
| MatchOpenBrace _ :: t -> | |
tokenizer_impl t ( OpenBrace :: acc) | |
| MatchDoubleQuotes _ :: t -> | |
let (a, b) = scan_string t "" | |
tokenizer_impl a <| String(b) :: acc | |
| MatchDigit h :: t-> | |
let (a, b) = scan_digit input "" | |
tokenizer_impl a ( Number(b) :: acc) | |
| MatchPlus _ :: MatchDigit h :: t-> | |
let (a, b) = scan_digit ( h :: t ) "" | |
tokenizer_impl a ( Number(b) :: acc) | |
| MatchMinus _ :: MatchDigit h :: t-> | |
let (a, b) = scan_digit (h :: t) "-" | |
tokenizer_impl a ( Number(b) :: acc) | |
| h :: t -> | |
let (a, b) = scan_other (h :: t) "-" | |
match b with | |
| None -> failwith "Expected a literal value but found none" | |
| Some(v) -> tokenizer_impl a ( v :: acc) | |
tokenizer_impl (List.ofArray (input.ToCharArray())) [] |> List.rev | |
type Ast = | |
| String of string | |
| Number of string | |
| Boolean of bool | |
| Null | |
| Array of Ast list | |
| Obj of (String * Ast) list | |
| Empty | |
let parse (input:string) = | |
let rec parse_array (input: Token list) ( acc: Ast list) = | |
match input with | |
| Comma :: CloseBracket :: t -> failwith "Unexpected , " | |
| CloseBracket :: [] -> acc | |
| CloseBracket :: t -> failwith "Unpected input after the end of the array" | |
| h :: Comma :: t -> | |
parse_array t ((parse_impl <| h :: []) :: acc) | |
| h :: CloseBracket :: [] -> | |
(parse_impl <| h :: []) :: acc | |
| _ -> failwith "Unpected token in stream" | |
and parse_object (input: Token list) ( acc: (String * Ast) list) = | |
match input with | |
| Token.String(property) :: Colon :: value :: Token.CloseBrace :: [] -> | |
let p = parse_impl <| value :: [] | |
let a = (property,p) | |
( a :: acc) | |
| Token.String(property) :: Colon :: value :: Comma :: t -> | |
let p = parse_impl <| value :: [] | |
let a = (property,p) | |
parse_object t ( a :: acc) | |
| _ -> failwith "Unpected token in stream" | |
and parse_impl (input:Token list) = | |
match input with | |
| [] -> Empty | |
| Token.String(v) :: [] -> String(v) | |
| Token.Number(v) :: [] -> Number(v) | |
| Token.Null :: [] -> Null | |
| True :: [] -> Boolean(true) | |
| False :: [] -> Boolean(false) | |
| Token.OpenBracket :: t -> Array( List.rev (parse_array t [])) | |
| Token.OpenBrace :: t -> Obj( List.rev (parse_object t [])) | |
| _ -> failwith "Invalid Json Token Stream" | |
parse_impl (tokenizer input) | |
module UnitTests = | |
[<TestFixture>] | |
module TokenizerTests = | |
open Json | |
[<Test>] | |
let scan_string_test () = | |
let text = @"""Hello World""" | |
let token = tokenizer text | |
printfn "%A" token | |
[<Test>] | |
let scan_number_test () = | |
let text = "123455" | |
let token = tokenizer text | |
printfn "%A" token | |
[<Test>] | |
let scan_array_test () = | |
let text = "[1,2,3,4,5,66]" | |
let token = tokenizer text | |
printfn "%A" token | |
[<Test>] | |
let array_parse_example () = | |
let text = "[1,2,3,4,5,66]" | |
let ast = parse text | |
printfn "%A" ast | |
[<Test>] | |
let obj_example () = | |
let text = @"{ ""Name"" : ""Blair Davidson"" }" | |
let ast = parse text | |
printfn "%A" ast | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment