Skip to content

Instantly share code, notes, and snippets.

@samdphillips
Created November 15, 2024 02:47
Show Gist options
  • Save samdphillips/e7056fb09101db596921c1d38153273d to your computer and use it in GitHub Desktop.
Save samdphillips/e7056fb09101db596921c1d38153273d to your computer and use it in GitHub Desktop.
Rhombus JSON POC
#lang rhombus/static/and_meta
import:
lib("racket/base.rkt"):
only: #{bytes-convert}
#{bytes-open-converter}
#{object-name}
#{port-counts-lines?}
#{port-next-location}
export:
read_json_token
read_json
JsExpr
veneer Input (this :: Port.Input):
method count_lines_enabled() :~ Boolean:
base.#{port-counts-lines?}(this)
method next_location() :~ values(PosInt, NonnegInt, PosInt):
base.#{port-next-location}(this)
method peek_byte() :~ Byte || Port.EOF:
this.peek_byte()
method read_byte() :~ Byte || Port.EOF:
this.read_byte()
method read_bytes(i :: NonnegInt) :~ ImmutableBytes || Port.EOF:
match this.read_bytes(i)
| bs :: Bytes: bs.snapshot()
| v: v
method cur_pos() :: NonnegInt || False:
let values(_, _, pos) = next_location()
pos
method srcloc() :: Srcloc || False:
if count_lines_enabled()
| let values(line, col, pos) = next_location()
Srcloc(base.#{object-name}(this), line, col, pos, 0)
| #false
def _init_buf_size = 1024
class StringBuf(mutable buf :: MutableBytes, mutable pos :: NonnegInt):
constructor ():
super(Bytes.make(_init_buf_size), 0)
method capacity():
buf.length()
method free_space():
capacity() - pos
method grow_buffer():
def new_capacity = (capacity() * 3) div 2
def new_buf = Bytes.make(new_capacity)
new_buf.copy_from(0, buf, 0, pos)
buf := new_buf
method add_char(c :: Char):
if c.to_int() > 127
| import: rhombus only: to_string
add_bytes(rhombus.to_string(c).utf8_bytes())
| add_byte(c.to_int())
method add_byte(b :: Byte):
when free_space() == 0
| grow_buffer()
buf[pos] := b
pos := pos + 1
method add_bytes(bbuf :: Bytes, amt :: PosInt = bbuf.length()):
when free_space() < amt
| grow_buffer()
buf.copy_from(pos, bbuf, 0, amt)
pos := pos + amt
method to_string() :: String:
buf.utf8_string(#false, 0, pos)
fun read_error(who, msg, in :~ Input):
error(~who: who,
~exn: fun(msg, mrks):
def srcloc:
let s = in.srcloc()
if s | PairList[s] | PairList[]
Exn.Fail.Read(msg, mrks, srcloc),
msg)
fun read_eof_error(who, in :~ Input):
error(~who: who,
~exn: fun(msg, mrks):
def srcloc:
let s = in.srcloc()
if s | PairList[s] | PairList[]
Exn.Fail.Read.EOF(msg, mrks, srcloc),
"unexpected eof")
fun is_ws_byte(b :: Byte || Port.EOF):
match b
| 0x20 || 0x0a || 0x0d || 0x09: #true
| _: #false
fun skip_spaces(in :: Input):
when is_ws_byte(in.peek_byte())
| in.read_byte()
skip_spaces(in)
def ZERO = Byte#"0"
def DIGITS = Byte#"0" ..= Byte#"9"
def ONENINE = Byte#"1" ..= Byte#"9"
fun is_digit_byte(b :: Byte) :~ Boolean:
DIGITS.has_element(b)
fun is_digit_nonzero(b :: Byte) :~ Boolean:
ONENINE.has_element(b)
annot.macro 'DigitByte': 'Byte && satisfying(is_digit_byte)'
fun is_seven_bit(b :: Byte) :~ Boolean:
b < 128
fun digit_value(b :: Byte):
b - ZERO
fun decode_hex(in :: Input) :: NonnegInt:
import .bits open
fun decode_digit():
match in.read_byte()
| _ :: Port.EOF:
read_eof_error(#'read_json_string, in)
| b :: DigitByte:
b and 0xF
| b:
def x = b >> 3
def y = b and 0b111
cond
| (x == 8 || x == 12) && (1 ..= 6).has_element(y):
9 + y
| ~else:
read_error(#'read_json_string,
"expected hex char, got " +& Char.from_int(b),
in)
def s = decode_digit()
def t = decode_digit()
def u = decode_digit()
def v = decode_digit()
(s << 12) + (t << 8) + (u << 4) + v
fun read_literal(bs :: Bytes, in :: Input) :: Bytes:
def in_read = in.read_bytes(bs.length())
when in_read is_a Port.EOF
| read_eof_error(#'read_literal, in)
unless bs == in_read
| read_error(#'read_literal,
"oops "
++ to_string(bs, ~mode: #'expr)
++ " "
++ to_string(in_read, ~mode: #'expr),
in)
in_read
fun read_json_string(in :: Input) :: String:
def buf = StringBuf()
fun decode_escape():
match in.read_byte()
| Byte#"\"": buf.add_char(#{#\"})
| Byte#"\\": buf.add_char(#{#\\})
| Byte#"/": buf.add_char(#{#\/})
| Byte#"b": buf.add_char(#{#\backspace})
| Byte#"f": buf.add_char(#{#\page})
| Byte#"n": buf.add_char(#{#\newline})
| Byte#"r": buf.add_char(#{#\return})
| Byte#"t": buf.add_char(#{#\tab})
| Byte#"u": decode_unichar()
| b: error(~who: #'read_json_string, "unknown escape " +& Char.from_int(b))
fun decode_unichar():
fun err():
read_error(#'read_json_string,
"missing second half of surrogate pair",
in)
def c = decode_hex(in)
def ch:
cond
| (0xD800 ..= 0xDFFF).has_element(c):
block:
unless Byte#"\\" == in.read_byte() | err()
def e = in.read_byte()
unless e == Byte#"U" || e == Byte#"u" | err()
def c2 = decode_hex(in)
if (0xDC00 ..= 0xDFFF).has_element(c2)
| import .bits open
def i = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000
Char.from_int(i)
| err()
| ~else:
Char.from_int(c)
buf.add_char(ch)
fun decode_utf8(b :: Byte):
def conv = base.#{bytes-open-converter}("UTF-8", "UTF-8")
decode_utf8_rest(0, 1, Bytes.make(6, b), conv)
fun decode_utf8_rest(start, end, ubuf :~ MutableBytes, conv):
def values(wamt, ramt, state):
base.#{bytes-convert}(conv, ubuf, start, end, ubuf, 0, 6)
match state
| #'complete:
buf.add_bytes(ubuf, end)
read_piece()
| #'error: error(~who: #'read_json_string, "utf8 decode error")
| #'aborts:
match in.read_byte()
| _ :: Port.EOF: error(~who: #'read_json_string, "eof in string")
| b: ubuf[end] := b
decode_utf8_rest(start + ramt, end + 1, ubuf, conv)
fun read_piece() :: String:
match in.read_byte()
| _ :: Port.EOF:
read_eof_error(#'read_json_string, in)
| Byte#"\"":
buf.to_string()
| Byte#"\\":
decode_escape()
read_piece()
| b :: satisfying(is_seven_bit):
buf.add_byte(b)
read_piece()
| b: decode_utf8(b)
read_piece()
fun read_json_number(in :: Input) :: Number:
match in.read_byte()
| Byte#"0": read_zero(in, #true)
| Byte#"-": read_negative_number(in)
| b :: satisfying(is_digit_nonzero):
read_number(in, #true, digit_value(b))
fun read_zero(in :~ Input, is_positive):
match in.peek_byte()
| Byte#"." || Byte#"e" || Byte#"E":
read_number(in, #true, 0)
| _: 0
fun read_negative_number(in :~ Input):
match in.peek_byte()
| Byte#"0": in.read_byte()
read_zero(in, #false)
| _ :: satisfying(is_digit_nonzero):
read_number(in, #false, 0)
fun read_number(in :~ Input, is_positive, init_value):
fun build_number(num, exp, is_inexact_result):
def exactness = if is_inexact_result | math.inexact | values
def sign = if is_positive | 1 | -1
exactness(sign * num * 10 ** exp)
fun read_integer(i_val):
match in.peek_byte()
| _ :: DigitByte:
read_integer(10 * i_val + digit_value(in.read_byte()))
| Byte#"e" || Byte#"E":
in.read_byte()
read_expt_init(i_val, 0)
| Byte#".":
in.read_byte()
read_fraction_init(i_val)
| _: build_number(i_val, 0, #false)
fun read_fraction_init(i_val):
match in.read_byte()
| b :: DigitByte:
read_fraction(10 * i_val + digit_value(b), -1)
| Byte#"e" || Byte#"E":
read_expt_init(i_val, 0)
| b:
error(~who: #'read_json_number,
"expected at least one digit after '.'. got: "
+& Char.from_int(b))
fun read_fraction(i_val, p_val):
match in.peek_byte()
| b :: DigitByte:
in.read_byte()
read_fraction(10 * i_val + digit_value(b), p_val - 1)
| Byte#"e" || Byte#"E":
in.read_byte()
read_expt_init(i_val, p_val)
| _:
build_number(i_val, p_val, #true)
fun read_expt_init(i_val, p_val):
def s_val:
match in.peek_byte()
| Byte#"+": in.read_byte(); 1
| Byte#"-": in.read_byte(); -1
| _: 1
read_expt(i_val, p_val, s_val, 0)
fun read_expt(i_val, p_val, s_val, e_val):
match in.peek_byte()
| _ :: DigitByte:
def e = 10 * e_val + digit_value(in.read_byte())
read_expt(i_val, p_val, s_val, e)
| _: build_number(i_val, p_val + s_val * e_val, #true)
read_integer(init_value)
fun read_json_token(in :: Input) :: Syntax || Port.EOF:
skip_spaces(in)
def start_loc = in.srcloc()
fun make_srcloc():
if start_loc
| def Srcloc(name, row, col, start_pos, _) = start_loc
def end_pos = in.cur_pos()
Srcloc(name, row, col, start_pos, end_pos - start_pos)
| #false
fun make_token(val):
Syntax.make(val).relocate(make_srcloc())
match in.peek_byte()
| Port.eof: Port.eof
| Byte#"n": read_literal(#"null", in)
make_token(#'null)
| Byte#"t": read_literal(#"true", in)
make_token(#true)
| Byte#"f": read_literal(#"false", in)
make_token(#false)
| Byte#"{": in.read_byte()
make_token(#'object_open)
| Byte#"}": in.read_byte()
make_token(#'object_close)
| Byte#"[": in.read_byte()
make_token(#'list_open)
| Byte#"]": in.read_byte()
make_token(#'list_close)
| Byte#",": in.read_byte()
make_token(#'comma)
| Byte#":": in.read_byte()
make_token(#'colon)
| Byte#"\"": in.read_byte()
def s = read_json_string(in)
make_token(s)
| Byte#"-" || (_ :: DigitByte):
make_token(read_json_number(in))
| b: error(~who: #'read_json_token,
"invalid token start char " +& Char.from_int(b))
fun is_jsexpr(v, ~shallow: shallow = #true):
fun rec(v):
is_jsexpr(v, ~shallow: #false)
match v
| _ :: Number: #true
| _ :: String: #true
| _ :: Boolean: #true
| #'null: #true
| vs :: List:
shallow || for all (v: vs): rec(v)
| m :: Map:
shallow
|| for all (values(k, v): m):
k is_a String && rec(v)
| _: #false
annot.macro
| 'JsExpr.deep': 'satisfying(is_jsexpr(_, ~shallow: #false))'
| 'JsExpr': 'satisfying(is_jsexpr)'
fun read_json(in :: Port.Input) :: JsExpr.deep || Port.EOF:
fun expected(exp, actual):
error(~who: #'read_json,
"expected " ++ exp,
error.val(~label: "token", actual))
fun read1(): read_json_token(in)
fun read_json_list():
match read1()
| 'list_close': []
| tok:
read_json_list_rest([read_inner(tok)])
fun read_json_list_rest(vs :~ List):
match read1()
| 'comma': read_json_list_rest(vs.add(read_inner(read1())))
| 'list_close': vs
fun read_json_object():
match read1()
| 'object_close': Map()
| tok: read_kvp(tok, Map())
fun read_json_object_rest(m):
match read1()
| 'comma': read_kvp(read1(), m)
| 'object_close': m
| tok:
expected("',' or '}'", tok)
fun read_kvp(next, m :~ Map):
match next
| '$(k :: String)':
def v:
match read1()
| 'colon': read_inner(read1())
| tok: expected("colon", tok)
read_json_object_rest(m ++ {k.unwrap(): v})
| tok:
expected("string", tok)
fun read_inner(next):
match next
| Port.eof: Port.eof
| '$(n :: Number)': n.unwrap()
| '$(s :: String)': s.unwrap()
| '$(b :: Boolean)': b.unwrap()
| 'null': #'null
| 'list_open': read_json_list()
| 'object_open': read_json_object()
| tok: expected("number, string, boolean, 'null', '[', or '{'", tok)
read_inner(read1())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment