Last active
August 30, 2018 16:45
-
-
Save jtpaasch/45897b2a582d77deb577d1b54efbc9d8 to your computer and use it in GitHub Desktop.
Simple JSON parser (OCaml)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module Token = struct | |
| type t = | |
| | L_array_bracket | |
| | R_array_bracket | |
| | L_object_bracket | |
| | R_object_bracket | |
| | Colon | |
| | Comma | |
| | Eof | |
| | Null | |
| | True | |
| | False | |
| | Number of string | |
| | String of string | |
| let string_of t = | |
| match t with | |
| | L_array_bracket -> "<TK [>" | |
| | R_array_bracket -> "<TK ]>" | |
| | L_object_bracket -> "<TK {>" | |
| | R_object_bracket -> "<TK }>" | |
| | Colon -> "<TK :>" | |
| | Comma -> "<TK ,>" | |
| | Eof -> "<TK Eof>" | |
| | Null -> "<TK null>" | |
| | True -> "<TK true>" | |
| | False -> "<TK false>" | |
| | Number s -> Printf.sprintf "<TK '%s'>" s | |
| | String s -> Printf.sprintf "<TK '%s'>" s | |
| end | |
| module Lexer = struct | |
| exception LexError of string | |
| let slice s n = | |
| let len = String.length s in | |
| match len >= n with | |
| | false -> "" | |
| | true -> String.sub s n (len - n) | |
| let get_null s = | |
| let len = String.length s in | |
| match len >= 4 with | |
| | true -> Token.Null, slice s 4 | |
| | false -> raise (LexError (Printf.sprintf "Unrecognized '%s'" s)) | |
| let get_true s = | |
| let len = String.length s in | |
| match len >= 4 with | |
| | true -> Token.True, slice s 4 | |
| | false -> raise (LexError (Printf.sprintf "Unrecognized '%s'" s)) | |
| let get_false s = | |
| let len = String.length s in | |
| match len >= 5 with | |
| | true -> Token.False, slice s 5 | |
| | false -> raise (LexError (Printf.sprintf "Unrecognized '%s'" s)) | |
| let rec build_number s acc = | |
| match String.length s > 0 with | |
| | false -> acc, s | |
| | true -> | |
| begin | |
| match s.[0] with | |
| | '0'..'9' -> | |
| begin | |
| let s' = slice s 1 in | |
| let acc' = Printf.sprintf "%s%c" acc s.[0] in | |
| build_number s' acc' | |
| end | |
| | '.' -> | |
| begin | |
| let s' = slice s 1 in | |
| match String.length s' > 0 with | |
| | false -> raise (LexError (Printf.sprintf "Expecting digit after dot in '%s'" s)) | |
| | true -> | |
| begin | |
| match s'.[0] with | |
| | '0'..'9' -> | |
| begin | |
| let s'' = slice s' 1 in | |
| let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in | |
| build_number s'' acc' | |
| end | |
| | _ -> raise (LexError (Printf.sprintf "Expecting digit after dot in '%s'" s)) | |
| end | |
| end | |
| | 'e' -> | |
| begin | |
| let s' = slice s 1 in | |
| match String.length s' > 0 with | |
| | false -> raise (LexError (Printf.sprintf "Expecting digit after 'e' in '%s'" s)) | |
| | true -> | |
| begin | |
| match s'.[0] with | |
| | '0'..'9' -> | |
| begin | |
| let s'' = slice s' 1 in | |
| let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in | |
| build_number s'' acc' | |
| end | |
| | _ -> raise (LexError (Printf.sprintf "Expecting digit after 'e' in '%s'" s)) | |
| end | |
| end | |
| | 'E' -> | |
| begin | |
| let s' = slice s 1 in | |
| match String.length s' > 0 with | |
| | false -> raise (LexError (Printf.sprintf "Expecting digit after 'E' in '%s'" s)) | |
| | true -> | |
| begin | |
| match s'.[0] with | |
| | '0'..'9' -> | |
| begin | |
| let s'' = slice s' 1 in | |
| let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in | |
| build_number s'' acc' | |
| end | |
| | _ -> raise (LexError (Printf.sprintf "Expecting digit after 'E' in '%s'" s)) | |
| end | |
| end | |
| | _ -> acc, s | |
| end | |
| let get_number s = | |
| let value, s' = build_number s "" in | |
| Token.Number value, s' | |
| let get_neg_number s = | |
| let s' = slice s 1 in | |
| match String.length s' > 0 with | |
| | false -> raise (LexError "Expecting number after minus symbol") | |
| | true -> | |
| begin | |
| match s'.[0] with | |
| | '0'..'9' -> | |
| begin | |
| let value, s'' = build_number s' "" in | |
| let neg_value = Printf.sprintf "-%s" value in | |
| Token.Number neg_value, s'' | |
| end | |
| | _ -> raise (LexError "Expecting number after minus symbol") | |
| end | |
| let rec build_string s acc = | |
| match String.length s > 0 with | |
| | false -> acc, s | |
| | true -> | |
| begin | |
| match s.[0] with | |
| | '"' -> | |
| begin | |
| let s' = slice s 1 in | |
| let acc' = Printf.sprintf "%s%c" acc s.[0] in | |
| acc', s' | |
| end | |
| | '\\' -> | |
| begin | |
| let s' = slice s 1 in | |
| match String.length s' > 0 with | |
| | false -> | |
| raise (LexError (Printf.sprintf "Expecting control character after slash in '%s'" s)) | |
| | true -> | |
| begin | |
| match s'.[0] with | |
| | '"' | |
| | '\\' | |
| | '/' | |
| | 'b' | |
| | 'f' | |
| | 'n' | |
| | 'r' | |
| | 't' -> | |
| begin | |
| let s'' = slice s' 1 in | |
| let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in | |
| build_string s'' acc' | |
| end | |
| | 'u' -> | |
| begin | |
| let s'' = slice s' 1 in | |
| match String.length s'' > 3 with | |
| | false -> | |
| raise (LexError (Printf.sprintf "Expecting 4 hexadecimal digits after \\u in '%s'" s)) | |
| | true -> | |
| begin | |
| let hex_chars = [ | |
| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; | |
| 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; | |
| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; | |
| ] in | |
| let matched_chars = | |
| List.filter (fun c -> List.mem c hex_chars) [s''.[0]; s''.[1]; s''.[2]; s''.[3]] in | |
| match List.length matched_chars = 4 with | |
| | true -> | |
| begin | |
| let code = String.sub s'' 0 4 in | |
| let s''' = slice s'' 4 in | |
| let acc' = Printf.sprintf "%s%c%c%s" acc s.[0] s'.[0] code in | |
| build_string s''' acc' | |
| end | |
| | false -> | |
| raise (LexError (Printf.sprintf "Expecting 4 hexadecimal digits after \\u in '%s'" s)) | |
| end | |
| end | |
| | _ -> | |
| raise (LexError (Printf.sprintf "Expecting control character after slash in '%s'" s)) | |
| end | |
| end | |
| | _ -> | |
| begin | |
| let s' = slice s 1 in | |
| let acc' = Printf.sprintf "%s%c" acc s.[0] in | |
| build_string s' acc' | |
| end | |
| end | |
| let get_string s = | |
| let value, s' = build_string s "" in | |
| match String.length value < 1 with | |
| | true -> raise (LexError (Printf.sprintf "Expecting string after opening quotation mark in '%s'" s)) | |
| | false -> | |
| begin | |
| let last_char = value.[String.length value - 1] in | |
| match last_char with | |
| | '"' -> | |
| begin | |
| let value' = String.sub value 0 (String.length value - 1) in | |
| Token.String value', s' | |
| end | |
| | _ -> raise (LexError (Printf.sprintf "Expecting closing quotation mark in '%s'" s)) | |
| end | |
| let rec lex s acc = | |
| match String.length s > 0 with | |
| | false -> List.append acc [Token.Eof] | |
| | true -> | |
| begin | |
| match s.[0] with | |
| | ' ' | |
| | '\t' | |
| | '\b' | |
| | '\r' | |
| | '\n' -> lex (slice s 1) acc | |
| | 'n' -> | |
| begin | |
| let token, the_rest = get_null s in | |
| let new_acc = List.append acc [token] in | |
| lex the_rest new_acc | |
| end | |
| | 't' -> | |
| begin | |
| let token, the_rest = get_true s in | |
| let new_acc = List.append acc [token] in | |
| lex the_rest new_acc | |
| end | |
| | 'f' -> | |
| begin | |
| let token, the_rest = get_false s in | |
| let new_acc = List.append acc [token] in | |
| lex the_rest new_acc | |
| end | |
| | '-' -> | |
| begin | |
| let token, the_rest = get_neg_number s in | |
| let new_acc = List.append acc [token] in | |
| lex the_rest new_acc | |
| end | |
| | '0'..'9' -> | |
| begin | |
| let token, the_rest = get_number s in | |
| let new_acc = List.append acc [token] in | |
| lex the_rest new_acc | |
| end | |
| | '"' -> | |
| begin | |
| let token, the_rest = get_string (slice s 1) in | |
| let new_acc = List.append acc [token] in | |
| lex the_rest new_acc | |
| end | |
| | '[' -> | |
| begin | |
| let new_acc = List.append acc [Token.L_array_bracket] in | |
| lex (slice s 1) new_acc | |
| end | |
| | ']' -> | |
| begin | |
| let new_acc = List.append acc [Token.R_array_bracket] in | |
| lex (slice s 1) new_acc | |
| end | |
| | '{' -> | |
| begin | |
| let new_acc = List.append acc [Token.L_object_bracket] in | |
| lex (slice s 1) new_acc | |
| end | |
| | '}' -> | |
| begin | |
| let new_acc = List.append acc [Token.R_object_bracket] in | |
| lex (slice s 1) new_acc | |
| end | |
| | ':' -> | |
| begin | |
| let new_acc = List.append acc [Token.Colon] in | |
| lex (slice s 1) new_acc | |
| end | |
| | ',' -> | |
| begin | |
| let new_acc = List.append acc [Token.Comma] in | |
| lex (slice s 1) new_acc | |
| end | |
| | _ -> raise (LexError (Printf.sprintf "Lexing error on: '%s'" s)) | |
| end | |
| end | |
| module Json = struct | |
| exception ParseError of string | |
| type t = | |
| | Json_end | |
| | Json_null | |
| | Json_true | |
| | Json_false | |
| | Json_number of string | |
| | Json_string of string | |
| | Json_array of t list | |
| | Json_object of (string * t) list | |
| let rec string_of t = | |
| match t with | |
| | Json_end -> "" | |
| | Json_null -> "null" | |
| | Json_true -> "true" | |
| | Json_false -> "false" | |
| | Json_number s -> Printf.sprintf "%s" s | |
| | Json_string s -> Printf.sprintf "\"%s\"" s | |
| | Json_array l -> | |
| begin | |
| let mapped_strs = List.map string_of l in | |
| let concatenated_strs = String.concat ", " mapped_strs in | |
| Printf.sprintf "[%s]" concatenated_strs | |
| end | |
| | Json_object l -> | |
| begin | |
| let mapped_strs = List.map (fun (k, v) -> Printf.sprintf "\"%s\": %s" k (string_of v)) l in | |
| let concatenated_strs = String.concat ", " mapped_strs in | |
| Printf.sprintf "{%s}" concatenated_strs | |
| end | |
| let check_for_junk res tokens msg = | |
| match tokens with | |
| | [] -> res | |
| | [Token.Eof] -> res | |
| | Token.Comma :: _ -> res | |
| | Token.R_array_bracket :: _ -> res | |
| | Token.R_object_bracket :: _ -> res | |
| | _ -> raise (ParseError msg) | |
| let rec parse_array_elements tokens f acc = | |
| let value, tokens = f tokens in | |
| let new_acc = List.append acc [value] in | |
| match tokens with | |
| | Token.R_array_bracket :: the_rest -> new_acc, the_rest | |
| | Token.Comma :: the_rest -> parse_array_elements the_rest f new_acc | |
| | _ -> raise (ParseError "Expecting comma or closing array bracket") | |
| let parse_array tokens f = | |
| match tokens with | |
| | [] -> raise (ParseError "Expecting array elements or closing bracket") | |
| | Token.R_array_bracket :: the_rest -> [], the_rest | |
| | _ -> parse_array_elements tokens f [] | |
| let rec parse_object_entries tokens f acc = | |
| let key, value, tokens = match tokens with | |
| | Token.String s :: Token.Colon :: the_rest -> | |
| begin | |
| let value, the_rest_rest = f the_rest in | |
| s, value, the_rest_rest | |
| end | |
| | _ -> raise (ParseError "Expecting key/value pair in object") in | |
| let new_acc = List.append acc [(key, value)] in | |
| match tokens with | |
| | Token.R_object_bracket :: the_rest -> new_acc, the_rest | |
| | Token.Comma :: the_rest -> parse_object_entries the_rest f new_acc | |
| | _ -> raise (ParseError "Expecting comma or closing object bracket") | |
| let parse_object tokens f = | |
| match tokens with | |
| | [] -> raise (ParseError "Expecting object entries or closing bracket") | |
| | Token.R_object_bracket :: the_rest -> [], the_rest | |
| | _ -> parse_object_entries tokens f [] | |
| let rec parse tokens = | |
| match tokens with | |
| | [] -> (Json_end, []) | |
| | Token.Null :: the_rest -> | |
| begin | |
| let res = (Json_null, the_rest) in | |
| let msg = "Junk after null" in | |
| check_for_junk res the_rest msg | |
| end | |
| | Token.True :: the_rest -> | |
| begin | |
| let res = (Json_true, the_rest) in | |
| let msg = "Junk after true" in | |
| check_for_junk res the_rest msg | |
| end | |
| | Token.False :: the_rest -> | |
| begin | |
| let res = (Json_false, the_rest) in | |
| let msg = "Junk after false" in | |
| check_for_junk res the_rest msg | |
| end | |
| | Token.Number n :: the_rest -> | |
| begin | |
| let res = (Json_number n, the_rest) in | |
| let msg = "Junk after number" in | |
| check_for_junk res the_rest msg | |
| end | |
| | Token.String s :: the_rest -> | |
| begin | |
| let res = (Json_string s, the_rest) in | |
| let msg = "Junk after string" in | |
| check_for_junk res the_rest msg | |
| end | |
| | Token.L_array_bracket :: the_rest -> | |
| begin | |
| let elements, the_rest_rest = parse_array the_rest parse in | |
| let res = Json_array elements, the_rest_rest in | |
| let msg = "Junk after array" in | |
| check_for_junk res the_rest_rest msg | |
| end | |
| | Token.L_object_bracket :: the_rest -> | |
| begin | |
| let entries, the_rest_rest = parse_object the_rest parse in | |
| let res = Json_object entries, the_rest_rest in | |
| let msg = "Junk after object" in | |
| check_for_junk res the_rest_rest msg | |
| end | |
| | _ -> raise (ParseError (Printf.sprintf "Expected JSON value.")) | |
| let decode s = | |
| let tokens = Lexer.lex s [] in | |
| let res, _ = parse tokens in | |
| res | |
| let encode j = string_of j | |
| end | |
| let get_arg = | |
| match Array.length Sys.argv >= 2 with | |
| | true -> Sys.argv.(1) | |
| | false -> | |
| begin | |
| Printf.printf "Error: no JSON string as first argument.\n%!"; | |
| exit 1 | |
| end | |
| let () = | |
| let expr = get_arg in | |
| try | |
| let res = Json.decode expr in | |
| Printf.printf "%s\n%!" (Json.encode res) | |
| with e -> | |
| match e with | |
| | Lexer.LexError s -> Printf.printf "Error: %s\n%!" s; exit 1 | |
| | Json.ParseError s -> Printf.printf "Error: %s\n%!" s; exit 1 | |
| | _ -> Printf.printf "Error: %s\n%!" (Printexc.to_string e); exit 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment