Skip to content

Instantly share code, notes, and snippets.

@jtpaasch
Last active August 30, 2018 16:45
Show Gist options
  • Select an option

  • Save jtpaasch/45897b2a582d77deb577d1b54efbc9d8 to your computer and use it in GitHub Desktop.

Select an option

Save jtpaasch/45897b2a582d77deb577d1b54efbc9d8 to your computer and use it in GitHub Desktop.
Simple JSON parser (OCaml)
module Token = struct
type t =
| L_array_bracket
| R_array_bracket
| L_object_bracket
| R_object_bracket
| Colon
| Comma
| Eof
| Null
| True
| False
| Number of string
| String of string
let string_of t =
match t with
| L_array_bracket -> "<TK [>"
| R_array_bracket -> "<TK ]>"
| L_object_bracket -> "<TK {>"
| R_object_bracket -> "<TK }>"
| Colon -> "<TK :>"
| Comma -> "<TK ,>"
| Eof -> "<TK Eof>"
| Null -> "<TK null>"
| True -> "<TK true>"
| False -> "<TK false>"
| Number s -> Printf.sprintf "<TK '%s'>" s
| String s -> Printf.sprintf "<TK '%s'>" s
end
module Lexer = struct
exception LexError of string
let slice s n =
let len = String.length s in
match len >= n with
| false -> ""
| true -> String.sub s n (len - n)
let get_null s =
let len = String.length s in
match len >= 4 with
| true -> Token.Null, slice s 4
| false -> raise (LexError (Printf.sprintf "Unrecognized '%s'" s))
let get_true s =
let len = String.length s in
match len >= 4 with
| true -> Token.True, slice s 4
| false -> raise (LexError (Printf.sprintf "Unrecognized '%s'" s))
let get_false s =
let len = String.length s in
match len >= 5 with
| true -> Token.False, slice s 5
| false -> raise (LexError (Printf.sprintf "Unrecognized '%s'" s))
let rec build_number s acc =
match String.length s > 0 with
| false -> acc, s
| true ->
begin
match s.[0] with
| '0'..'9' ->
begin
let s' = slice s 1 in
let acc' = Printf.sprintf "%s%c" acc s.[0] in
build_number s' acc'
end
| '.' ->
begin
let s' = slice s 1 in
match String.length s' > 0 with
| false -> raise (LexError (Printf.sprintf "Expecting digit after dot in '%s'" s))
| true ->
begin
match s'.[0] with
| '0'..'9' ->
begin
let s'' = slice s' 1 in
let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in
build_number s'' acc'
end
| _ -> raise (LexError (Printf.sprintf "Expecting digit after dot in '%s'" s))
end
end
| 'e' ->
begin
let s' = slice s 1 in
match String.length s' > 0 with
| false -> raise (LexError (Printf.sprintf "Expecting digit after 'e' in '%s'" s))
| true ->
begin
match s'.[0] with
| '0'..'9' ->
begin
let s'' = slice s' 1 in
let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in
build_number s'' acc'
end
| _ -> raise (LexError (Printf.sprintf "Expecting digit after 'e' in '%s'" s))
end
end
| 'E' ->
begin
let s' = slice s 1 in
match String.length s' > 0 with
| false -> raise (LexError (Printf.sprintf "Expecting digit after 'E' in '%s'" s))
| true ->
begin
match s'.[0] with
| '0'..'9' ->
begin
let s'' = slice s' 1 in
let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in
build_number s'' acc'
end
| _ -> raise (LexError (Printf.sprintf "Expecting digit after 'E' in '%s'" s))
end
end
| _ -> acc, s
end
let get_number s =
let value, s' = build_number s "" in
Token.Number value, s'
let get_neg_number s =
let s' = slice s 1 in
match String.length s' > 0 with
| false -> raise (LexError "Expecting number after minus symbol")
| true ->
begin
match s'.[0] with
| '0'..'9' ->
begin
let value, s'' = build_number s' "" in
let neg_value = Printf.sprintf "-%s" value in
Token.Number neg_value, s''
end
| _ -> raise (LexError "Expecting number after minus symbol")
end
let rec build_string s acc =
match String.length s > 0 with
| false -> acc, s
| true ->
begin
match s.[0] with
| '"' ->
begin
let s' = slice s 1 in
let acc' = Printf.sprintf "%s%c" acc s.[0] in
acc', s'
end
| '\\' ->
begin
let s' = slice s 1 in
match String.length s' > 0 with
| false ->
raise (LexError (Printf.sprintf "Expecting control character after slash in '%s'" s))
| true ->
begin
match s'.[0] with
| '"'
| '\\'
| '/'
| 'b'
| 'f'
| 'n'
| 'r'
| 't' ->
begin
let s'' = slice s' 1 in
let acc' = Printf.sprintf "%s%c%c" acc s.[0] s'.[0] in
build_string s'' acc'
end
| 'u' ->
begin
let s'' = slice s' 1 in
match String.length s'' > 3 with
| false ->
raise (LexError (Printf.sprintf "Expecting 4 hexadecimal digits after \\u in '%s'" s))
| true ->
begin
let hex_chars = [
'0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9';
'a'; 'b'; 'c'; 'd'; 'e'; 'f';
'A'; 'B'; 'C'; 'D'; 'E'; 'F';
] in
let matched_chars =
List.filter (fun c -> List.mem c hex_chars) [s''.[0]; s''.[1]; s''.[2]; s''.[3]] in
match List.length matched_chars = 4 with
| true ->
begin
let code = String.sub s'' 0 4 in
let s''' = slice s'' 4 in
let acc' = Printf.sprintf "%s%c%c%s" acc s.[0] s'.[0] code in
build_string s''' acc'
end
| false ->
raise (LexError (Printf.sprintf "Expecting 4 hexadecimal digits after \\u in '%s'" s))
end
end
| _ ->
raise (LexError (Printf.sprintf "Expecting control character after slash in '%s'" s))
end
end
| _ ->
begin
let s' = slice s 1 in
let acc' = Printf.sprintf "%s%c" acc s.[0] in
build_string s' acc'
end
end
let get_string s =
let value, s' = build_string s "" in
match String.length value < 1 with
| true -> raise (LexError (Printf.sprintf "Expecting string after opening quotation mark in '%s'" s))
| false ->
begin
let last_char = value.[String.length value - 1] in
match last_char with
| '"' ->
begin
let value' = String.sub value 0 (String.length value - 1) in
Token.String value', s'
end
| _ -> raise (LexError (Printf.sprintf "Expecting closing quotation mark in '%s'" s))
end
let rec lex s acc =
match String.length s > 0 with
| false -> List.append acc [Token.Eof]
| true ->
begin
match s.[0] with
| ' '
| '\t'
| '\b'
| '\r'
| '\n' -> lex (slice s 1) acc
| 'n' ->
begin
let token, the_rest = get_null s in
let new_acc = List.append acc [token] in
lex the_rest new_acc
end
| 't' ->
begin
let token, the_rest = get_true s in
let new_acc = List.append acc [token] in
lex the_rest new_acc
end
| 'f' ->
begin
let token, the_rest = get_false s in
let new_acc = List.append acc [token] in
lex the_rest new_acc
end
| '-' ->
begin
let token, the_rest = get_neg_number s in
let new_acc = List.append acc [token] in
lex the_rest new_acc
end
| '0'..'9' ->
begin
let token, the_rest = get_number s in
let new_acc = List.append acc [token] in
lex the_rest new_acc
end
| '"' ->
begin
let token, the_rest = get_string (slice s 1) in
let new_acc = List.append acc [token] in
lex the_rest new_acc
end
| '[' ->
begin
let new_acc = List.append acc [Token.L_array_bracket] in
lex (slice s 1) new_acc
end
| ']' ->
begin
let new_acc = List.append acc [Token.R_array_bracket] in
lex (slice s 1) new_acc
end
| '{' ->
begin
let new_acc = List.append acc [Token.L_object_bracket] in
lex (slice s 1) new_acc
end
| '}' ->
begin
let new_acc = List.append acc [Token.R_object_bracket] in
lex (slice s 1) new_acc
end
| ':' ->
begin
let new_acc = List.append acc [Token.Colon] in
lex (slice s 1) new_acc
end
| ',' ->
begin
let new_acc = List.append acc [Token.Comma] in
lex (slice s 1) new_acc
end
| _ -> raise (LexError (Printf.sprintf "Lexing error on: '%s'" s))
end
end
module Json = struct
exception ParseError of string
type t =
| Json_end
| Json_null
| Json_true
| Json_false
| Json_number of string
| Json_string of string
| Json_array of t list
| Json_object of (string * t) list
let rec string_of t =
match t with
| Json_end -> ""
| Json_null -> "null"
| Json_true -> "true"
| Json_false -> "false"
| Json_number s -> Printf.sprintf "%s" s
| Json_string s -> Printf.sprintf "\"%s\"" s
| Json_array l ->
begin
let mapped_strs = List.map string_of l in
let concatenated_strs = String.concat ", " mapped_strs in
Printf.sprintf "[%s]" concatenated_strs
end
| Json_object l ->
begin
let mapped_strs = List.map (fun (k, v) -> Printf.sprintf "\"%s\": %s" k (string_of v)) l in
let concatenated_strs = String.concat ", " mapped_strs in
Printf.sprintf "{%s}" concatenated_strs
end
let check_for_junk res tokens msg =
match tokens with
| [] -> res
| [Token.Eof] -> res
| Token.Comma :: _ -> res
| Token.R_array_bracket :: _ -> res
| Token.R_object_bracket :: _ -> res
| _ -> raise (ParseError msg)
let rec parse_array_elements tokens f acc =
let value, tokens = f tokens in
let new_acc = List.append acc [value] in
match tokens with
| Token.R_array_bracket :: the_rest -> new_acc, the_rest
| Token.Comma :: the_rest -> parse_array_elements the_rest f new_acc
| _ -> raise (ParseError "Expecting comma or closing array bracket")
let parse_array tokens f =
match tokens with
| [] -> raise (ParseError "Expecting array elements or closing bracket")
| Token.R_array_bracket :: the_rest -> [], the_rest
| _ -> parse_array_elements tokens f []
let rec parse_object_entries tokens f acc =
let key, value, tokens = match tokens with
| Token.String s :: Token.Colon :: the_rest ->
begin
let value, the_rest_rest = f the_rest in
s, value, the_rest_rest
end
| _ -> raise (ParseError "Expecting key/value pair in object") in
let new_acc = List.append acc [(key, value)] in
match tokens with
| Token.R_object_bracket :: the_rest -> new_acc, the_rest
| Token.Comma :: the_rest -> parse_object_entries the_rest f new_acc
| _ -> raise (ParseError "Expecting comma or closing object bracket")
let parse_object tokens f =
match tokens with
| [] -> raise (ParseError "Expecting object entries or closing bracket")
| Token.R_object_bracket :: the_rest -> [], the_rest
| _ -> parse_object_entries tokens f []
let rec parse tokens =
match tokens with
| [] -> (Json_end, [])
| Token.Null :: the_rest ->
begin
let res = (Json_null, the_rest) in
let msg = "Junk after null" in
check_for_junk res the_rest msg
end
| Token.True :: the_rest ->
begin
let res = (Json_true, the_rest) in
let msg = "Junk after true" in
check_for_junk res the_rest msg
end
| Token.False :: the_rest ->
begin
let res = (Json_false, the_rest) in
let msg = "Junk after false" in
check_for_junk res the_rest msg
end
| Token.Number n :: the_rest ->
begin
let res = (Json_number n, the_rest) in
let msg = "Junk after number" in
check_for_junk res the_rest msg
end
| Token.String s :: the_rest ->
begin
let res = (Json_string s, the_rest) in
let msg = "Junk after string" in
check_for_junk res the_rest msg
end
| Token.L_array_bracket :: the_rest ->
begin
let elements, the_rest_rest = parse_array the_rest parse in
let res = Json_array elements, the_rest_rest in
let msg = "Junk after array" in
check_for_junk res the_rest_rest msg
end
| Token.L_object_bracket :: the_rest ->
begin
let entries, the_rest_rest = parse_object the_rest parse in
let res = Json_object entries, the_rest_rest in
let msg = "Junk after object" in
check_for_junk res the_rest_rest msg
end
| _ -> raise (ParseError (Printf.sprintf "Expected JSON value."))
let decode s =
let tokens = Lexer.lex s [] in
let res, _ = parse tokens in
res
let encode j = string_of j
end
let get_arg =
match Array.length Sys.argv >= 2 with
| true -> Sys.argv.(1)
| false ->
begin
Printf.printf "Error: no JSON string as first argument.\n%!";
exit 1
end
let () =
let expr = get_arg in
try
let res = Json.decode expr in
Printf.printf "%s\n%!" (Json.encode res)
with e ->
match e with
| Lexer.LexError s -> Printf.printf "Error: %s\n%!" s; exit 1
| Json.ParseError s -> Printf.printf "Error: %s\n%!" s; exit 1
| _ -> Printf.printf "Error: %s\n%!" (Printexc.to_string e); exit 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment