Created
November 15, 2024 02:47
-
-
Save samdphillips/e7056fb09101db596921c1d38153273d to your computer and use it in GitHub Desktop.
Rhombus JSON POC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang rhombus/static/and_meta | |
import: | |
lib("racket/base.rkt"): | |
only: #{bytes-convert} | |
#{bytes-open-converter} | |
#{object-name} | |
#{port-counts-lines?} | |
#{port-next-location} | |
export: | |
read_json_token | |
read_json | |
JsExpr | |
veneer Input (this :: Port.Input): | |
method count_lines_enabled() :~ Boolean: | |
base.#{port-counts-lines?}(this) | |
method next_location() :~ values(PosInt, NonnegInt, PosInt): | |
base.#{port-next-location}(this) | |
method peek_byte() :~ Byte || Port.EOF: | |
this.peek_byte() | |
method read_byte() :~ Byte || Port.EOF: | |
this.read_byte() | |
method read_bytes(i :: NonnegInt) :~ ImmutableBytes || Port.EOF: | |
match this.read_bytes(i) | |
| bs :: Bytes: bs.snapshot() | |
| v: v | |
method cur_pos() :: NonnegInt || False: | |
let values(_, _, pos) = next_location() | |
pos | |
method srcloc() :: Srcloc || False: | |
if count_lines_enabled() | |
| let values(line, col, pos) = next_location() | |
Srcloc(base.#{object-name}(this), line, col, pos, 0) | |
| #false | |
def _init_buf_size = 1024 | |
class StringBuf(mutable buf :: MutableBytes, mutable pos :: NonnegInt): | |
constructor (): | |
super(Bytes.make(_init_buf_size), 0) | |
method capacity(): | |
buf.length() | |
method free_space(): | |
capacity() - pos | |
method grow_buffer(): | |
def new_capacity = (capacity() * 3) div 2 | |
def new_buf = Bytes.make(new_capacity) | |
new_buf.copy_from(0, buf, 0, pos) | |
buf := new_buf | |
method add_char(c :: Char): | |
if c.to_int() > 127 | |
| import: rhombus only: to_string | |
add_bytes(rhombus.to_string(c).utf8_bytes()) | |
| add_byte(c.to_int()) | |
method add_byte(b :: Byte): | |
when free_space() == 0 | |
| grow_buffer() | |
buf[pos] := b | |
pos := pos + 1 | |
method add_bytes(bbuf :: Bytes, amt :: PosInt = bbuf.length()): | |
when free_space() < amt | |
| grow_buffer() | |
buf.copy_from(pos, bbuf, 0, amt) | |
pos := pos + amt | |
method to_string() :: String: | |
buf.utf8_string(#false, 0, pos) | |
fun read_error(who, msg, in :~ Input): | |
error(~who: who, | |
~exn: fun(msg, mrks): | |
def srcloc: | |
let s = in.srcloc() | |
if s | PairList[s] | PairList[] | |
Exn.Fail.Read(msg, mrks, srcloc), | |
msg) | |
fun read_eof_error(who, in :~ Input): | |
error(~who: who, | |
~exn: fun(msg, mrks): | |
def srcloc: | |
let s = in.srcloc() | |
if s | PairList[s] | PairList[] | |
Exn.Fail.Read.EOF(msg, mrks, srcloc), | |
"unexpected eof") | |
fun is_ws_byte(b :: Byte || Port.EOF): | |
match b | |
| 0x20 || 0x0a || 0x0d || 0x09: #true | |
| _: #false | |
fun skip_spaces(in :: Input): | |
when is_ws_byte(in.peek_byte()) | |
| in.read_byte() | |
skip_spaces(in) | |
def ZERO = Byte#"0" | |
def DIGITS = Byte#"0" ..= Byte#"9" | |
def ONENINE = Byte#"1" ..= Byte#"9" | |
fun is_digit_byte(b :: Byte) :~ Boolean: | |
DIGITS.has_element(b) | |
fun is_digit_nonzero(b :: Byte) :~ Boolean: | |
ONENINE.has_element(b) | |
annot.macro 'DigitByte': 'Byte && satisfying(is_digit_byte)' | |
fun is_seven_bit(b :: Byte) :~ Boolean: | |
b < 128 | |
fun digit_value(b :: Byte): | |
b - ZERO | |
fun decode_hex(in :: Input) :: NonnegInt: | |
import .bits open | |
fun decode_digit(): | |
match in.read_byte() | |
| _ :: Port.EOF: | |
read_eof_error(#'read_json_string, in) | |
| b :: DigitByte: | |
b and 0xF | |
| b: | |
def x = b >> 3 | |
def y = b and 0b111 | |
cond | |
| (x == 8 || x == 12) && (1 ..= 6).has_element(y): | |
9 + y | |
| ~else: | |
read_error(#'read_json_string, | |
"expected hex char, got " +& Char.from_int(b), | |
in) | |
def s = decode_digit() | |
def t = decode_digit() | |
def u = decode_digit() | |
def v = decode_digit() | |
(s << 12) + (t << 8) + (u << 4) + v | |
fun read_literal(bs :: Bytes, in :: Input) :: Bytes: | |
def in_read = in.read_bytes(bs.length()) | |
when in_read is_a Port.EOF | |
| read_eof_error(#'read_literal, in) | |
unless bs == in_read | |
| read_error(#'read_literal, | |
"oops " | |
++ to_string(bs, ~mode: #'expr) | |
++ " " | |
++ to_string(in_read, ~mode: #'expr), | |
in) | |
in_read | |
fun read_json_string(in :: Input) :: String: | |
def buf = StringBuf() | |
fun decode_escape(): | |
match in.read_byte() | |
| Byte#"\"": buf.add_char(#{#\"}) | |
| Byte#"\\": buf.add_char(#{#\\}) | |
| Byte#"/": buf.add_char(#{#\/}) | |
| Byte#"b": buf.add_char(#{#\backspace}) | |
| Byte#"f": buf.add_char(#{#\page}) | |
| Byte#"n": buf.add_char(#{#\newline}) | |
| Byte#"r": buf.add_char(#{#\return}) | |
| Byte#"t": buf.add_char(#{#\tab}) | |
| Byte#"u": decode_unichar() | |
| b: error(~who: #'read_json_string, "unknown escape " +& Char.from_int(b)) | |
fun decode_unichar(): | |
fun err(): | |
read_error(#'read_json_string, | |
"missing second half of surrogate pair", | |
in) | |
def c = decode_hex(in) | |
def ch: | |
cond | |
| (0xD800 ..= 0xDFFF).has_element(c): | |
block: | |
unless Byte#"\\" == in.read_byte() | err() | |
def e = in.read_byte() | |
unless e == Byte#"U" || e == Byte#"u" | err() | |
def c2 = decode_hex(in) | |
if (0xDC00 ..= 0xDFFF).has_element(c2) | |
| import .bits open | |
def i = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000 | |
Char.from_int(i) | |
| err() | |
| ~else: | |
Char.from_int(c) | |
buf.add_char(ch) | |
fun decode_utf8(b :: Byte): | |
def conv = base.#{bytes-open-converter}("UTF-8", "UTF-8") | |
decode_utf8_rest(0, 1, Bytes.make(6, b), conv) | |
fun decode_utf8_rest(start, end, ubuf :~ MutableBytes, conv): | |
def values(wamt, ramt, state): | |
base.#{bytes-convert}(conv, ubuf, start, end, ubuf, 0, 6) | |
match state | |
| #'complete: | |
buf.add_bytes(ubuf, end) | |
read_piece() | |
| #'error: error(~who: #'read_json_string, "utf8 decode error") | |
| #'aborts: | |
match in.read_byte() | |
| _ :: Port.EOF: error(~who: #'read_json_string, "eof in string") | |
| b: ubuf[end] := b | |
decode_utf8_rest(start + ramt, end + 1, ubuf, conv) | |
fun read_piece() :: String: | |
match in.read_byte() | |
| _ :: Port.EOF: | |
read_eof_error(#'read_json_string, in) | |
| Byte#"\"": | |
buf.to_string() | |
| Byte#"\\": | |
decode_escape() | |
read_piece() | |
| b :: satisfying(is_seven_bit): | |
buf.add_byte(b) | |
read_piece() | |
| b: decode_utf8(b) | |
read_piece() | |
fun read_json_number(in :: Input) :: Number: | |
match in.read_byte() | |
| Byte#"0": read_zero(in, #true) | |
| Byte#"-": read_negative_number(in) | |
| b :: satisfying(is_digit_nonzero): | |
read_number(in, #true, digit_value(b)) | |
fun read_zero(in :~ Input, is_positive): | |
match in.peek_byte() | |
| Byte#"." || Byte#"e" || Byte#"E": | |
read_number(in, #true, 0) | |
| _: 0 | |
fun read_negative_number(in :~ Input): | |
match in.peek_byte() | |
| Byte#"0": in.read_byte() | |
read_zero(in, #false) | |
| _ :: satisfying(is_digit_nonzero): | |
read_number(in, #false, 0) | |
fun read_number(in :~ Input, is_positive, init_value): | |
fun build_number(num, exp, is_inexact_result): | |
def exactness = if is_inexact_result | math.inexact | values | |
def sign = if is_positive | 1 | -1 | |
exactness(sign * num * 10 ** exp) | |
fun read_integer(i_val): | |
match in.peek_byte() | |
| _ :: DigitByte: | |
read_integer(10 * i_val + digit_value(in.read_byte())) | |
| Byte#"e" || Byte#"E": | |
in.read_byte() | |
read_expt_init(i_val, 0) | |
| Byte#".": | |
in.read_byte() | |
read_fraction_init(i_val) | |
| _: build_number(i_val, 0, #false) | |
fun read_fraction_init(i_val): | |
match in.read_byte() | |
| b :: DigitByte: | |
read_fraction(10 * i_val + digit_value(b), -1) | |
| Byte#"e" || Byte#"E": | |
read_expt_init(i_val, 0) | |
| b: | |
error(~who: #'read_json_number, | |
"expected at least one digit after '.'. got: " | |
+& Char.from_int(b)) | |
fun read_fraction(i_val, p_val): | |
match in.peek_byte() | |
| b :: DigitByte: | |
in.read_byte() | |
read_fraction(10 * i_val + digit_value(b), p_val - 1) | |
| Byte#"e" || Byte#"E": | |
in.read_byte() | |
read_expt_init(i_val, p_val) | |
| _: | |
build_number(i_val, p_val, #true) | |
fun read_expt_init(i_val, p_val): | |
def s_val: | |
match in.peek_byte() | |
| Byte#"+": in.read_byte(); 1 | |
| Byte#"-": in.read_byte(); -1 | |
| _: 1 | |
read_expt(i_val, p_val, s_val, 0) | |
fun read_expt(i_val, p_val, s_val, e_val): | |
match in.peek_byte() | |
| _ :: DigitByte: | |
def e = 10 * e_val + digit_value(in.read_byte()) | |
read_expt(i_val, p_val, s_val, e) | |
| _: build_number(i_val, p_val + s_val * e_val, #true) | |
read_integer(init_value) | |
fun read_json_token(in :: Input) :: Syntax || Port.EOF: | |
skip_spaces(in) | |
def start_loc = in.srcloc() | |
fun make_srcloc(): | |
if start_loc | |
| def Srcloc(name, row, col, start_pos, _) = start_loc | |
def end_pos = in.cur_pos() | |
Srcloc(name, row, col, start_pos, end_pos - start_pos) | |
| #false | |
fun make_token(val): | |
Syntax.make(val).relocate(make_srcloc()) | |
match in.peek_byte() | |
| Port.eof: Port.eof | |
| Byte#"n": read_literal(#"null", in) | |
make_token(#'null) | |
| Byte#"t": read_literal(#"true", in) | |
make_token(#true) | |
| Byte#"f": read_literal(#"false", in) | |
make_token(#false) | |
| Byte#"{": in.read_byte() | |
make_token(#'object_open) | |
| Byte#"}": in.read_byte() | |
make_token(#'object_close) | |
| Byte#"[": in.read_byte() | |
make_token(#'list_open) | |
| Byte#"]": in.read_byte() | |
make_token(#'list_close) | |
| Byte#",": in.read_byte() | |
make_token(#'comma) | |
| Byte#":": in.read_byte() | |
make_token(#'colon) | |
| Byte#"\"": in.read_byte() | |
def s = read_json_string(in) | |
make_token(s) | |
| Byte#"-" || (_ :: DigitByte): | |
make_token(read_json_number(in)) | |
| b: error(~who: #'read_json_token, | |
"invalid token start char " +& Char.from_int(b)) | |
fun is_jsexpr(v, ~shallow: shallow = #true): | |
fun rec(v): | |
is_jsexpr(v, ~shallow: #false) | |
match v | |
| _ :: Number: #true | |
| _ :: String: #true | |
| _ :: Boolean: #true | |
| #'null: #true | |
| vs :: List: | |
shallow || for all (v: vs): rec(v) | |
| m :: Map: | |
shallow | |
|| for all (values(k, v): m): | |
k is_a String && rec(v) | |
| _: #false | |
annot.macro | |
| 'JsExpr.deep': 'satisfying(is_jsexpr(_, ~shallow: #false))' | |
| 'JsExpr': 'satisfying(is_jsexpr)' | |
fun read_json(in :: Port.Input) :: JsExpr.deep || Port.EOF: | |
fun expected(exp, actual): | |
error(~who: #'read_json, | |
"expected " ++ exp, | |
error.val(~label: "token", actual)) | |
fun read1(): read_json_token(in) | |
fun read_json_list(): | |
match read1() | |
| 'list_close': [] | |
| tok: | |
read_json_list_rest([read_inner(tok)]) | |
fun read_json_list_rest(vs :~ List): | |
match read1() | |
| 'comma': read_json_list_rest(vs.add(read_inner(read1()))) | |
| 'list_close': vs | |
fun read_json_object(): | |
match read1() | |
| 'object_close': Map() | |
| tok: read_kvp(tok, Map()) | |
fun read_json_object_rest(m): | |
match read1() | |
| 'comma': read_kvp(read1(), m) | |
| 'object_close': m | |
| tok: | |
expected("',' or '}'", tok) | |
fun read_kvp(next, m :~ Map): | |
match next | |
| '$(k :: String)': | |
def v: | |
match read1() | |
| 'colon': read_inner(read1()) | |
| tok: expected("colon", tok) | |
read_json_object_rest(m ++ {k.unwrap(): v}) | |
| tok: | |
expected("string", tok) | |
fun read_inner(next): | |
match next | |
| Port.eof: Port.eof | |
| '$(n :: Number)': n.unwrap() | |
| '$(s :: String)': s.unwrap() | |
| '$(b :: Boolean)': b.unwrap() | |
| 'null': #'null | |
| 'list_open': read_json_list() | |
| 'object_open': read_json_object() | |
| tok: expected("number, string, boolean, 'null', '[', or '{'", tok) | |
read_inner(read1()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment