Skip to content

Instantly share code, notes, and snippets.

@mitchellh
Last active April 5, 2023 21:04
Show Gist options
  • Save mitchellh/6bfb689fa9a0c23cc8b172afabec1652 to your computer and use it in GitHub Desktop.
Save mitchellh/6bfb689fa9a0c23cc8b172afabec1652 to your computer and use it in GitHub Desktop.
Streaming JSON decoder for Zig (NOT COMPLETE!)
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const StreamingParser = std.json.StreamingParser;
const Token = std.json.Token;
const TokenTag = std.meta.FieldEnum(Token);
/// Field options are options that can be set per-field on a struct at
/// comptime to control how decoding behaves.
pub const FieldOptions = struct {
/// Ignore this field. Do not populate it under any circumstance.
ignore: bool = false,
};
/// Decoder states is the full list of decoder states. This is built at
/// comptime for a given type T. This is the required memory footprint for
/// a StreamingDecoder for a given type T. You can use @sizeOf to determine
/// if the size of the decoder state is reasonable for your program.
///
/// The states are in order that they can be visited. states[0] is the root
/// state and the root type. The following states depends on the tag of the state.
/// Constructs a JSON decoder for T that can operate on streaming data.
/// This does not require the full JSON document to be available to immediately
/// start decoding and to detect decoding errors.
///
/// The decoder does require allocation for constructing the type T, and
/// for decoding strings, numbers, etc. The memory requirements for the
/// StreamingDecoder are highly dependent on the structure T and input data.
/// For example, strings and numbers must be fully buffered in memory before
/// they are finally parsed. Other values do not require any buffering.
///
/// You may control the memory requirements through the options, for example
/// by specifying maximum nesting depths, maximum buffer sizes, etc.
///
/// # Field Options
///
/// If you are decoding a struct, you can define a decl with the following
/// signature in order to define field-specific options:
///
/// fn jsonFieldOpts(comptime field: std.meta.FieldEnum(@This())) FieldOptions
///
/// # How it Works
///
/// The streaming decoder works by building a state machine at comptime.
/// The state machine is represented as a tape/array of "fields". Some
/// example of tape states are shown below:
///
/// Input: struct { a: u8, b: bool }
/// Tape: [struct, int, bool]
///
/// Input: struct { a: u8, b: struct { c: []const u8 } }
/// Tape: [struct, int, struct, string]
///
/// Input: struct { a: u8, b: struct { c: []bool } }
/// Tape: [struct, int, struct, slice, bool]
///
/// At runtime, we start at state 0 on the tape. The tape value built
/// at comptime tells us the type we expect and the result location for
/// the final value.
///
/// Result locations are computed at comptime and let us determine where
/// to write decoded data. See ResultLoc for more details.
pub fn StreamingDecoder(comptime T: type, comptime opts: struct {
/// Maximum depth for nesting. Nesting occurs at any struct or slice layer.
max_depth: usize = 32,
/// If true, unknown fields will not produce an error.
ignore_unknown_fields: bool = false,
}) type {
return struct {
const Self = @This();
/// Possible errors from decoding
const Error = error{
OutOfMemory,
Overflow,
InvalidNumber,
InvalidValue,
MissingField,
UnknownField,
} || PartialTokenStream.Error || std.fmt.ParseFloatError;
/// The array of fields that can possibly be decoded into.
const fields = Field.initArray(T);
/// The general purpose allocator. This is only used to construct
/// new arenas if necessary during reset.
alloc: Allocator,
/// The allocator is used to create any necessary values such as slices.
/// This is owned by the value root and is reset during reset.
arena: ArenaAllocator,
/// The temp arena is used for temporary values only used during
/// decoding. This is reset on reset and finalize and the caller
/// never owns this memory.
temp_arena: ArenaAllocator,
/// The parser state for the decoding.
stream: PartialTokenStream,
/// The buffer for building data. This is allocated in the perm arena
/// for use directly in the result values. This is used to buffer values
/// such as strings and numbers until they're ready to parse.
data_buffer: std.ArrayListUnmanaged(u8) = .{},
/// The root target. This is not safe to use until you've completed
/// the JSON stream.
root: *T,
/// Internal state for tracking decoding.
i: usize = 0,
stack_i: usize = 0,
stack: [opts.max_depth]StackElem = undefined,
finalized: bool = false,
pub fn init(alloc: Allocator, target: *T) Self {
var self: Self = .{
.alloc = alloc,
.arena = ArenaAllocator.init(alloc),
.temp_arena = ArenaAllocator.init(alloc),
.stream = undefined,
.root = undefined,
};
self.reset(target);
return self;
}
pub fn deinit(self: *Self) void {
if (!self.finalized) self.arena.deinit();
self.temp_arena.deinit();
}
/// Process the next slice of data.
pub fn next(self: *Self, data: []const u8) Error!void {
self.stream.refill(data);
while (try self.stream.next()) |token| {
try self.nextToken(token);
}
// If our parser state is in the the middle of a string,
// we need to buffer the data.
switch (self.stream.partialState() orelse return) {
.string => |count| {
const start = if (count > data.len) 0 else data.len - count;
const partial = data[start..data.len];
try self.data_buffer.appendSlice(self.arena.allocator(), partial);
},
.number => |raw_count| {
const count = @max(raw_count + 1, 1);
const start = if (count > data.len) 0 else data.len - count;
const partial = data[start..data.len];
try self.data_buffer.appendSlice(self.arena.allocator(), partial);
},
}
}
/// Finalize the value. This MUST be called when no more data is
/// expected because there may be some buffered tokens that need
/// to be processed.
///
/// After this is called, the caller takes ownership of the
/// value and the arena.
pub fn finalize(self: *Self) Error!void {
assert(!self.finalized);
self.finalized = true;
self.stream.refill("");
if (try self.stream.finalize()) |token| {
try self.nextToken(token);
}
}
/// Resets the decoder so that it starts rewriting the target.
/// This does NOT zero all the values in the target so if they
/// are not rewritten then they will retain the old value.
///
/// This will also clear the arena.
pub fn reset(self: *Self, target: *T) void {
self.root = target;
self.i = 0;
self.stack_i = 0;
self.stream = PartialTokenStream.init("");
self.data_buffer = .{};
assert(self.temp_arena.reset(.{ .retain_capacity = {} }));
// If we aren't finalized, we just reset our arena. If we
// are finalized, we need to create a whole new arena because
// the caller took ownership.
if (!self.finalized) {
assert(self.arena.reset(.{ .retain_capacity = {} }));
} else {
self.arena = ArenaAllocator.init(self.alloc);
}
self.finalized = false;
}
pub const Writer = std.io.Writer(*Self, Error, writeFn);
/// Returns a Writer that adheres to std.io.Writer.
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
fn writeFn(self: *Self, bytes: []const u8) Error!usize {
try self.next(bytes);
return bytes.len;
}
fn pushStack(self: *Self, data: ?*anyopaque) !void {
assert(self.stack_i < opts.max_depth);
self.stack[self.stack_i] = .{ .parent_i = self.i, .data = data };
self.stack_i += 1;
}
fn popStack(self: *Self) ?StackElem {
if (self.stack_i == 0) return null;
// Decrement our stack index and set our index on the tape back
// to the parent location. NOTE: the order of subtraction and index
// is correct since stack_i is always length.
self.stack_i -= 1;
const last = self.stack[self.stack_i];
self.i = last.parent_i;
return last;
}
fn peekStack(self: *Self) ?StackElem {
if (self.stack_i == 0) return null;
return self.stack[self.stack_i - 1];
}
fn appendDataBuffer(self: *Self, count: usize) error{OutOfMemory}!void {
const data = self.stream.data;
const idx = self.stream.i - 1;
const start = if (count > data.len) 0 else idx -| count;
const slice = data[start..idx];
const alloc = self.arena.allocator();
try self.data_buffer.appendSlice(alloc, slice);
}
fn parseIntData(self: *Self, comptime Int: type) !Int {
const data = self.data_buffer.items;
return std.fmt.parseInt(Int, data, 10) catch |err| switch (err) {
error.Overflow => return err,
error.InvalidCharacter => {
const float = try std.fmt.parseFloat(f128, data);
if (@round(float) != float) return Error.InvalidNumber;
if (float > std.math.maxInt(Int) or
float < std.math.minInt(Int)) return Error.Overflow;
return @floatToInt(Int, float);
},
};
}
fn storeResult(self: *Self, comptime loc: ResultLoc, value: anytype) Error!void {
switch (loc) {
.root => self.root.* = value,
.list => {
assert(self.stack_i > 0);
const stack_elem = self.stack[self.stack_i - 1];
switch (stack_elem.parent_i) {
else => unreachable,
inline 0...(fields.len - 1) => |i| {
const parent = fields[i];
switch (parent.elem) {
.slice => |Type| {
const List = std.ArrayListUnmanaged(Type);
const list = stack_elem.dataCast(List);
if (comptime isAssignable(Type, @TypeOf(value))) {
try list.append(self.arena.allocator(), value);
}
},
else => unreachable,
}
},
}
},
.struct_field => |idx| {
assert(self.stack_i > 0);
const stack_elem = self.stack[self.stack_i - 1];
switch (stack_elem.parent_i) {
else => unreachable,
inline 0...(fields.len - 1) => |i| {
const parent = fields[i];
switch (parent.elem) {
.@"struct" => |struct_info| {
// We can codegen situations where this happens but
// at runtime it should never happen.
if (idx >= struct_info.fields.len) unreachable;
const struct_state = stack_elem.dataCast(StructState);
const struct_value = @ptrCast(
*struct_info.type,
@alignCast(@alignOf(struct_info.type), struct_state.value),
);
const field = struct_info.fields[idx].field;
//std.log.warn("isAssignable Dest={} Value={}", .{ field.type, @TypeOf(value) });
if (comptime isAssignable(field.type, @TypeOf(value))) {
@field(struct_value, field.name) = value;
}
},
else => unreachable,
}
},
}
},
}
}
fn isAssignable(comptime Dest: type, comptime Src: type) bool {
// Matching
if (Src == Dest) return true;
// Optionals
if ((Src != @TypeOf(null) and ?Src == Dest) or
(Src == @TypeOf(null) and @typeInfo(Dest) == .Optional))
{
return true;
}
// Slice
const src = @typeInfo(Src);
const dest = @typeInfo(Dest);
if (src == .Pointer and dest == .Pointer) {
const srcP = src.Pointer;
const destP = dest.Pointer;
if (srcP.size == destP.size) {
switch (srcP.size) {
// []T == []T
// []const T == []T
.Slice => {
if (!isAssignable(destP.child, srcP.child))
return false;
// Must both have a sentinel or both have no sentinel
if ((srcP.sentinel != null and destP.sentinel == null) or
(srcP.sentinel == null and destP.sentinel != null))
return false;
return true;
},
else => {},
}
}
}
return false;
}
/// Called to exit an array.
fn exitArray(
self: *Self,
comptime loc: ResultLoc,
comptime Elem: type,
stack_elem: StackElem,
) Error!void {
// Preconditions
if (comptime std.debug.runtime_safety) {
const stack = self.peekStack().?;
assert(stack.parentTag() == .slice);
}
// We pop the object from the top of the stack, but we want to
// go to the NEXT field. If we still have a stack item, we go
// back to that. Otherwise we go to the next field.
// This pops our current object and puts us back to the root
// of this struct, which we don't want to continue.
assert(self.popStack() != null);
defer self.maybeFinishStructValue();
const List = std.ArrayListUnmanaged(Elem);
const list = stack_elem.dataCast(List);
defer self.temp_arena.allocator().destroy(list);
try self.storeResult(loc, list.items);
}
/// Called to exit the struct.
fn exitStruct(
self: *Self,
comptime loc: ResultLoc,
comptime info: Field.Struct,
state: *StructState,
) Error!void {
// Preconditions
if (comptime std.debug.runtime_safety) {
const stack = self.peekStack().?;
assert(stack.parentTag() == .@"struct");
assert(stack.dataCast(StructState).state == .key);
}
// We pop the object from the top of the stack, but we want to
// go to the NEXT field. If we still have a stack item, we go
// back to that. Otherwise we go to the next field.
// This pops our current object and puts us back to the root
// of this struct, which we don't want to continue.
assert(self.popStack() != null);
defer self.maybeFinishStructValue();
const value = @ptrCast(*info.type, @alignCast(@alignOf(info.type), state.value));
// Set all our default values if we have them.
inline for (info.fields, 0..) |field, field_idx| {
if (!state.seen[field_idx]) {
const field_info = field.field;
if (field_info.default_value) |default| {
if (!field_info.is_comptime) {
@field(value, field_info.name) = @ptrCast(
*const field_info.type,
@alignCast(@alignOf(field_info.type), default),
).*;
}
} else {
return error.MissingField;
}
}
}
const alloc = self.temp_arena.allocator();
state.destroy(alloc, info.type);
try self.storeResult(loc, value.*);
}
fn maybeFinishStructValue(self: *Self) void {
// On exit, we set ourselves to our grandparent if we have
// one. TODO: otherwise, we move to the next value?
// This solves the scenario of [struct, struct, value] where
// the inner struct completes. This is tested.
if (self.peekStack()) |grandparent| {
// If our grandparent is a struct, then we need to reset
// the struct state to "key" because we have completed a value;
// the value being a nested struct.
if (grandparent.parentTag() == .@"struct") {
grandparent.dataCast(StructState).state = .key;
self.i = grandparent.parent_i;
}
}
}
/// Process per-token. This is not safe to expose publicly because
/// it requires careful access to the underlying buffered data
/// in certain scenarios (strings).
fn nextToken(self: *Self, t: Token) Error!void {
if (comptime fields.len == 0) return Error.InvalidValue;
//std.log.warn("token={}", .{t});
const stack_elem_: ?StackElem = self.peekStack();
if (stack_elem_) |stack_elem| {
switch (stack_elem.parent_i) {
else => unreachable,
inline 0...(fields.len - 1) => |i| {
const parent = fields[i];
switch (parent.elem) {
.slice => |Type| switch (t) {
// End of the array, so we complete our slice and
// store it onto the final value.
.ArrayEnd => {
try self.exitArray(parent.loc, Type, stack_elem);
return;
},
else => {},
},
.@"struct" => |struct_info| {
if (stack_elem.parent_i == self.i) {
const state = stack_elem.dataCast(StructState);
switch (state.state) {
.key => switch (t) {
.ObjectEnd => {
try self.exitStruct(
parent.loc,
struct_info,
state,
);
return;
},
else => {},
},
// We're ignoring a value, so we just keep track
// of our nesting depth and exit when we're done.
.value_ignore => {
switch (t) {
.ObjectBegin,
.ArrayBegin,
=> state.ignore_depth += 1,
.ObjectEnd,
.ArrayEnd,
=> state.ignore_depth -= 1,
.String,
.Number,
.True,
.False,
.Null,
=> {},
}
if (state.ignore_depth == 0) {
state.state = .key;
}
return;
},
else => {},
}
}
},
else => {},
}
},
}
}
// We have to use a switch with an inline case to build this
// functionality because self.i is runtime but we want to access
// a comptime value out of "fields".
var current_tag: Field.Tag = undefined;
switch (self.i) {
else => unreachable,
inline 0...(fields.len - 1) => |i| blk: {
const current = fields[i];
current_tag = fields[i].elem.tag();
const loc = current.loc;
// We special-case nullable fields here and store null immediately.
if (t == .Null and current.nullable) {
try self.storeResult(loc, null);
break :blk;
}
switch (current.elem) {
.bool => switch (t) {
.True => try self.storeResult(loc, true),
.False => try self.storeResult(loc, false),
else => return Error.InvalidValue,
},
.int => |Type| switch (t) {
inline .Number, .String => |token| {
// Build the full data buffer
try self.appendDataBuffer(token.count);
defer self.data_buffer = .{};
const value = try self.parseIntData(Type);
try self.storeResult(loc, value);
},
else => return Error.InvalidValue,
},
.string => |Type| switch (t) {
.String => |v| {
try self.appendDataBuffer(v.count);
defer self.data_buffer = .{};
const value: Type = value: {
const stringInfo = @typeInfo(Type).Pointer;
if (stringInfo.sentinel) |sentinel_opaque| {
const sentinel = @ptrCast(*const u8, sentinel_opaque).*;
const alloc = self.arena.allocator();
try self.data_buffer.append(alloc, sentinel);
const items = self.data_buffer.items;
break :value items[0 .. items.len - 1 :sentinel];
}
break :value self.data_buffer.items;
};
try self.storeResult(loc, value);
},
else => return Error.InvalidValue,
},
.slice => |Type| switch (t) {
.ArrayBegin => {
// Create our array list in the temp alloc.
const List = std.ArrayListUnmanaged(Type);
var list = try self.temp_arena.allocator().create(List);
errdefer self.alloc.destroy(list);
list.* = .{};
try self.pushStack(list);
},
else => return Error.InvalidValue,
},
.@"struct" => |struct_info| {
// Get our current struct state if we're currently
// inside of THIS struct.
const current_struct: ?*StructState = state: {
if (stack_elem_) |stack_elem| {
if (stack_elem.parent_i == self.i) {
break :state stack_elem.dataCast(StructState);
}
}
break :state null;
};
if (current_struct) |state| {
switch (state.state) {
.key => switch (t) {
.String => |v| {
try self.appendDataBuffer(v.count);
defer self.data_buffer = .{};
// The use of "outer:" here prevents a comptime crash.
outer: {
const str = self.data_buffer.items;
inline for (struct_info.fields, 0..) |field, field_idx| {
if (std.mem.eql(u8, field.field.name, str)) {
state.state = .value;
state.seen[field_idx] = true;
self.i += field.offset;
break :outer;
}
}
// Unknown field
if (!opts.ignore_unknown_fields) {
return Error.UnknownField;
}
// Read a value but ignore it
state.state = .value_ignore;
state.ignore_depth = 0;
}
},
else => return Error.InvalidValue,
},
// Unreachable because we should be on a diff field in
// the fields array.
.value, .value_ignore => unreachable,
}
return;
}
// We are expecting a new struct.
switch (t) {
.ObjectBegin => {
const alloc = self.temp_arena.allocator();
try self.pushStack(try StructState.create(
alloc,
struct_info.type,
struct_info.fields.len,
));
return;
},
else => return Error.InvalidValue,
}
},
}
},
}
// In some scenarios, we don't increment i. For example, if
// we're in the middle of a slice, we stay at the same value
// until we reach the end of the array.
if (stack_elem_) |stack_elem| {
switch (stack_elem.parentTag()) {
.slice => if (Field.Elem.isPrimitive(current_tag)) return,
.@"struct" => if (Field.Elem.isPrimitive(current_tag)) {
const state = stack_elem.dataCast(StructState);
assert(state.state == .value);
state.state = .key;
self.i = stack_elem_.?.parent_i;
return;
},
else => {},
}
}
// If it was successful then we move to the next state
self.i += 1;
}
const StackElem = struct {
parent_i: usize,
/// Data is:
/// - StructState if parentTag == @"struct"
/// - ArrayList if parentTag == @"slice"
data: ?*anyopaque,
/// Cast the data field to the given Data type. Asserts that
/// data is not null.
fn dataCast(self: StackElem, comptime Data: type) *Data {
return @ptrCast(*Data, @alignCast(@alignOf(Data), self.data.?));
}
/// Return the tag of the parent field that this points to.
fn parentTag(self: StackElem) Field.Tag {
switch (self.parent_i) {
else => unreachable,
inline 0...(fields.len - 1) => |i| {
const parent = fields[i];
return parent.elem.tag();
},
}
}
};
};
}
const StructState = struct {
state: enum { key, value, value_ignore } = .key,
seen: []bool,
value: *anyopaque,
ignore_depth: u32 = 0,
pub fn create(alloc: Allocator, comptime ValueType: type, fields: usize) !*StructState {
var state = try alloc.create(StructState);
errdefer alloc.destroy(state);
var value = try alloc.create(ValueType);
errdefer alloc.destroy(value);
var seen = try alloc.alloc(bool, fields);
errdefer alloc.destroy(seen);
state.* = .{
.state = .key,
.seen = seen,
.value = value,
};
return state;
}
pub fn destroy(self: *StructState, alloc: Allocator, comptime ValueType: type) void {
// These must be in the reverse order as create so that we work
// well with arenas.
const value = @ptrCast(*ValueType, @alignCast(@alignOf(ValueType), self.value));
alloc.destroy(self.seen);
alloc.destroy(value);
alloc.destroy(self);
}
};
/// A field represents some decodable JSON value. This is a comptime
/// generated value based on T.
const Field = struct {
/// The result location where this field value is written.
loc: ResultLoc,
/// The element type of this field.
elem: Elem,
/// If true, this field is nullable in the source Zig type.
nullable: bool = false,
const Tag = std.meta.FieldEnum(Elem);
const Elem = union(enum) {
bool,
@"struct": Struct,
string: type,
int: type,
slice: type,
fn tag(comptime self: Elem) Tag {
return @field(Tag, @tagName(self));
}
fn isPrimitive(t: Tag) bool {
return switch (t) {
.bool,
.int,
.string,
=> true,
.slice,
.@"struct",
=> false,
};
}
};
const StructField = struct {
/// The @typeInfo for the field this represents.
field: std.builtin.Type.StructField,
/// The offset in the field tape from the @"struct" this
/// is a part of.
offset: usize,
};
const Struct = struct {
type: type,
fields: []const StructField,
};
/// Returns a perfectly sized array for the number of fields
/// that a given type Child will take up.
fn Array(comptime Child: type) type {
return [slots(Child)]Field;
}
/// Initializes the field array for a given child type Child.
fn initArray(comptime Child: type) Array(Child) {
var acc: Array(Child) = undefined;
initArrayAcc(Array(Child), &acc, 0, .{ .root = {} }, Child);
return acc;
}
fn initArrayAcc(
comptime ArrayType: type,
comptime acc: *ArrayType,
comptime i: usize,
comptime loc: ResultLoc,
comptime LocType: type,
) void {
const elem: Field.Elem = switch (@typeInfo(LocType)) {
.Bool => .{ .bool = {} },
.Int => .{ .int = LocType },
.Optional => |info| {
// Reinitialize with our child type and mark that we can
// be optional.
initArrayAcc(ArrayType, acc, i, loc, info.child);
acc[i].nullable = true;
return;
},
.Struct => |s| elem: {
// This accumulates the information about the fields
// and their associated offsets.
var fields_entry: [s.fields.len]Field.StructField = undefined;
var field_idx: usize = 0;
var j: usize = 1;
for (s.fields) |field| {
if (comptime ignoreField(LocType, field.name)) continue;
const len = Field.slots(field.type);
fields_entry[field_idx] = .{
.field = field,
.offset = j,
};
initArrayAcc(
ArrayType,
acc,
i + j,
.{ .struct_field = field_idx },
field.type,
);
j += len;
field_idx += 1;
}
break :elem .{ .@"struct" = .{
.type = LocType,
.fields = fields_entry[0..field_idx],
} };
},
.Pointer => |p| switch (p.size) {
.Slice => switch (LocType) {
[]const u8,
[:0]const u8,
=> .{ .string = LocType },
else => elem: {
// We build our child type first so we can get the proper
// child type. The location of the child store goes
// into the slice element.
initArrayAcc(ArrayType, acc, i + 1, .{ .list = {} }, p.child);
const Child = acc[i + 1].Type();
break :elem .{ .slice = Child };
},
},
else => unreachable,
},
else => unreachable,
};
acc[i] = .{
.loc = loc,
.elem = elem,
};
}
/// Calculates the number of field slots that Type consumes.
fn slots(comptime Child: type) usize {
return slotsAcc(0, Child);
}
fn slotsAcc(comptime acc: usize, comptime Current: type) usize {
return acc + switch (@typeInfo(Current)) {
.Bool => 1,
.Optional => |info| slotsAcc(0, info.child),
.Int => 1,
.Float => 1,
.Struct => |s| acc: {
var sum: usize = 1;
inline for (s.fields) |field| {
if (comptime !ignoreField(Current, field.name)) {
sum += slotsAcc(0, field.type);
}
}
break :acc sum;
},
.Pointer => |p| switch (p.size) {
.Slice => switch (Current) {
[]const u8,
[:0]const u8,
=> 1,
else => slotsAcc(1, p.child),
},
else => unreachable,
},
else => unreachable,
};
}
/// Returns true if the field in S should be ignored.
fn ignoreField(comptime S: type, comptime field: []const u8) bool {
if (!@hasDecl(S, "jsonFieldOpts")) return false;
const Fields = std.meta.FieldEnum(S);
const field_opts = S.jsonFieldOpts(@field(Fields, field));
return field_opts.ignore;
}
fn Type(comptime self: Field) type {
// Warning: do NOT use inline switch here:
// https://github.com/ziglang/zig/issues/15157
var Result = switch (self.elem) {
.int => |t| t,
.string => |t| t,
.@"struct" => |info| info.type,
.bool => bool,
.slice => |Child| []Child,
};
if (self.nullable) {
Result = @Type(.{
.Optional = .{
.child = Result,
},
});
}
return Result;
}
};
/// ResultLoc keeps track of where the currently decoded value should
/// be written.
const ResultLoc = union(enum) {
/// Write to the root element
root: void,
/// Write to an array list located on the stack
list: void,
/// Write to the parent struct's field at the given index.
/// The parent struct value is on the stack.
struct_field: usize,
};
test "StreamingDecoder bool" {
const testing = std.testing;
const alloc = testing.allocator;
const T = bool;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 1), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("true");
try testing.expect(target);
}
{
defer state.reset(&target);
try state.next("false");
try testing.expect(!target);
}
}
test "StreamingDecoder optional bool" {
const testing = std.testing;
const alloc = testing.allocator;
const T = ?bool;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 1), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("true");
try testing.expect(target.?);
}
{
defer state.reset(&target);
try state.next("false");
try testing.expect(!target.?);
}
{
defer state.reset(&target);
try state.next("null");
try testing.expect(target == null);
}
}
test "StreamingDecoder slice of bool" {
const testing = std.testing;
const alloc = testing.allocator;
const T = []bool;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 2), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
// Empty array
{
defer state.reset(&target);
try state.next("[]");
try testing.expectEqual(@as(usize, 0), target.len);
}
// Single item
{
defer state.reset(&target);
try state.next("[ true ]");
try testing.expectEqual(@as(usize, 1), target.len);
try testing.expectEqual(@as(bool, true), target[0]);
}
// Multiple item
{
defer state.reset(&target);
try state.next("[ true, false ]");
try testing.expectEqual(@as(usize, 2), target.len);
try testing.expectEqual(@as(bool, true), target[0]);
try testing.expectEqual(@as(bool, false), target[1]);
}
}
test "StreamingDecoder slice of slice of bool" {
const testing = std.testing;
const alloc = testing.allocator;
const T = [][]bool;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 3), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
// Empty array
{
defer state.reset(&target);
try state.next("[]");
try testing.expectEqual(@as(usize, 0), target.len);
}
// Single item
{
defer state.reset(&target);
try state.next("[[]]");
try testing.expectEqual(@as(usize, 1), target.len);
try testing.expectEqual(@as(usize, 0), target[0].len);
}
// Single item with value
{
defer state.reset(&target);
try state.next("[[true]]");
try testing.expectEqual(@as(usize, 1), target.len);
try testing.expectEqual(@as(usize, 1), target[0].len);
try testing.expectEqual(@as(bool, true), target[0][0]);
}
// Multiple item with value
{
defer state.reset(&target);
try state.next("[[true], [false]]");
try testing.expectEqual(@as(usize, 2), target.len);
try testing.expectEqual(@as(usize, 1), target[0].len);
try testing.expectEqual(@as(bool, true), target[0][0]);
try testing.expectEqual(@as(usize, 1), target[1].len);
try testing.expectEqual(@as(bool, false), target[1][0]);
}
}
test "StreamingDecoder slice of optional slice of bool" {
const testing = std.testing;
const alloc = testing.allocator;
const T = []?[]bool;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 3), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
// Empty array
{
defer state.reset(&target);
try state.next("[]");
try testing.expectEqual(@as(usize, 0), target.len);
}
// Single item
{
defer state.reset(&target);
try state.next("[[]]");
try testing.expectEqual(@as(usize, 1), target.len);
try testing.expectEqual(@as(usize, 0), target[0].?.len);
}
}
test "StreamingDecoder string" {
const testing = std.testing;
const alloc = testing.allocator;
const T = []const u8;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 1), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("\"hello\"");
try testing.expectEqualStrings("hello", target);
}
// Split across multiple data inputs
{
defer state.reset(&target);
try state.next("\"hel");
try state.next("lo\"");
try testing.expectEqualStrings("hello", target);
}
// Split across multiple data inputs with whitespace
{
defer state.reset(&target);
try state.next(" \"hel");
try state.next("lo\" ");
try testing.expectEqualStrings("hello", target);
}
}
test "StreamingDecoder string with sentinel" {
const testing = std.testing;
const alloc = testing.allocator;
const T = [:0]const u8;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 1), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("\"hello\"");
try testing.expectEqualStrings("hello", target);
}
}
test "StreamingDecoder slice of string" {
const testing = std.testing;
const alloc = testing.allocator;
const T = [][]const u8;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 2), S.fields.len);
var t: T = undefined;
var state = S.init(alloc, &t);
defer state.deinit();
{
defer state.reset(&t);
try state.next(
\\[ "hello" ]
);
try testing.expectEqual(@as(usize, 1), t.len);
try testing.expectEqualStrings("hello", t[0]);
}
}
test "StreamingDecoder number u8" {
const testing = std.testing;
const alloc = testing.allocator;
const T = u8;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 1), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
// Int
{
defer state.reset(&target);
try state.next("42 ");
try testing.expectEqual(@as(T, 42), target);
}
// Int with finalize
{
defer state.reset(&target);
try state.next("42 ");
try state.finalize();
defer state.arena.deinit();
try testing.expectEqual(@as(T, 42), target);
}
// Int with finalize no space
{
defer state.reset(&target);
try state.next("42");
try state.finalize();
defer state.arena.deinit();
try testing.expectEqual(@as(T, 42), target);
}
// Float-like int
{
defer state.reset(&target);
try state.next("42.0 ");
try testing.expectEqual(@as(T, 42), target);
}
// String
{
defer state.reset(&target);
try state.next("\"42\"");
try testing.expectEqual(@as(T, 42), target);
}
// String float
{
defer state.reset(&target);
try state.next("\"42.0\"");
try testing.expectEqual(@as(T, 42), target);
}
}
test "StreamingDecoder number u8 split" {
const testing = std.testing;
const alloc = testing.allocator;
const T = u8;
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 1), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
// Int
{
defer state.reset(&target);
try state.next("4");
try state.next("2 ");
try testing.expectEqual(@as(T, 42), target);
}
}
test "StreamingDecoder struct simple" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct { value: u8 };
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 2), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("{ \"value\": 42 }");
try testing.expectEqual(T{ .value = 42 }, target);
}
}
test "StreamingDecoder struct simple ignore unknown" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct { value: u8 };
const S = StreamingDecoder(T, .{ .ignore_unknown_fields = true });
try testing.expectEqual(@as(usize, 2), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
// Basic value
{
defer state.reset(&target);
try state.next("{ \"value\": 42, \"what\": false }");
try testing.expectEqual(T{ .value = 42 }, target);
}
// Nested arrays and objects
{
defer state.reset(&target);
try state.next("{ \"value\": 42, \"what\": [{}, {}, [], [{}]] }");
try testing.expectEqual(T{ .value = 42 }, target);
}
}
test "StreamingDecoder struct default value" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct { value: u8 = 84 };
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 2), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("{}");
try testing.expectEqual(T{ .value = 84 }, target);
}
}
test "StreamingDecoder struct multiple fields" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct { a: u8, b: bool };
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 3), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("{ \"b\": true, \"a\": 42 }");
try testing.expectEqual(T{ .a = 42, .b = true }, target);
}
}
test "StreamingDecoder struct nested" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct {
a: struct { b: u8 },
};
const S = StreamingDecoder(T, .{});
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("{ \"a\": { \"b\": 42 } }");
try testing.expectEqual(T{ .a = .{ .b = 42 } }, target);
}
}
test "StreamingDecoder nested struct with multiple fields" {
const T = struct {
nested: struct {
a: u8,
b: bool,
},
};
const testing = std.testing;
const alloc = testing.allocator;
const S = StreamingDecoder(T, .{});
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next(
\\{
\\ "nested": {
\\ "a": 42, "b": true
\\ }
\\}
);
}
}
test "StreamingDecoder slice of struct" {
const testing = std.testing;
const alloc = testing.allocator;
const T = []struct {
a: u8,
};
const S = StreamingDecoder(T, .{});
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next(
\\[
\\ { "a": 1 }
\\]
);
try testing.expectEqual(@as(usize, 1), target.len);
try testing.expectEqual(@as(@TypeOf(target[0]), .{ .a = 1 }), target[0]);
}
{
defer state.reset(&target);
try state.next(
\\[
\\ { "a": 1 },
\\ { "a": 2 }
\\]
);
try testing.expectEqual(@as(usize, 2), target.len);
try testing.expectEqual(@as(@TypeOf(target[0]), .{ .a = 1 }), target[0]);
try testing.expectEqual(@as(@TypeOf(target[0]), .{ .a = 2 }), target[1]);
}
}
test "StreamingDecoder struct with string" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct {
a: u8,
b: []const u8,
};
const S = StreamingDecoder(T, .{});
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next(
\\{ "a": 1, "b": "howdy" }
);
try testing.expectEqual(@as(u8, 1), target.a);
try testing.expectEqualStrings("howdy", target.b);
}
}
test "StreamingDecoder struct with sliceof const struct" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct {
a: u8,
b: []const Child,
const Child = struct { v: []const u8 };
};
const S = StreamingDecoder(T, .{});
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next(
\\{
\\ "a": 1,
\\ "b": [{ "v": "howdy" }]
\\}
);
try testing.expectEqual(@as(u8, 1), target.a);
try testing.expectEqual(@as(usize, 1), target.b.len);
try testing.expectEqualStrings("howdy", target.b[0].v);
}
}
test "StreamingDecoder struct with slice of string" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct {
a: u8,
b: [][]const u8,
};
const S = StreamingDecoder(T, .{});
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next(
\\{
\\ "a": 1,
\\ "b": ["hello"]
\\}
);
try testing.expectEqual(@as(u8, 1), target.a);
try testing.expectEqual(@as(usize, 1), target.b.len);
try testing.expectEqualStrings("hello", target.b[0]);
}
}
test "StreamingDecoder struct field options" {
const testing = std.testing;
const alloc = testing.allocator;
const T = struct {
arena: ArenaAllocator = undefined,
value: u8,
pub fn jsonFieldOpts(comptime field: std.meta.FieldEnum(@This())) FieldOptions {
return switch (field) {
.arena => .{ .ignore = true },
else => .{},
};
}
};
const S = StreamingDecoder(T, .{});
try testing.expectEqual(@as(usize, 2), S.fields.len);
var target: T = undefined;
var state = S.init(alloc, &target);
defer state.deinit();
{
defer state.reset(&target);
try state.next("{ \"value\": 42 }");
try testing.expectEqual(@as(u8, 42), target.value);
}
}
/// A replacement for std.json.TokenStream that doesn't expect a complete,
/// finished slice of input bytes. It is up to the caller to tell the stream
/// when the input is over.
///
/// Beware: there are some tokens (i.e. String, Number) whose values can only
/// be realized with a fully constructed slice of data. The caller is responsible
/// for buffering this data if it cares.
///
/// This structure owns no memory and allocates no memory.
pub const PartialTokenStream = struct {
/// The parser for the stream. If you reconstruct the TokenStream,
/// this must be copied and replaced to restore state.
parser: StreamingParser = StreamingParser.init(),
data: []const u8,
i: usize = 0,
token: ?Token = null,
pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson};
pub const PartialState = union(enum) {
string: usize,
number: usize,
};
pub fn init(data: []const u8) PartialTokenStream {
return .{
.data = data,
};
}
/// Refill the data with the given slice. This must only be called
/// once the previous data is exhausted.
pub fn refill(self: *PartialTokenStream, data: []const u8) void {
assert(self.i == self.data.len);
self.data = data;
self.i = 0;
}
/// Read the next token, null means that the data is exhausted.
/// After data is exhausted, either refill or finalize.
pub fn next(self: *PartialTokenStream) Error!?Token {
if (self.token) |token| {
self.token = null;
return token;
}
var t1: ?Token = undefined;
var t2: ?Token = undefined;
while (self.i < self.data.len) {
try self.parser.feed(self.data[self.i], &t1, &t2);
self.i += 1;
if (t1) |token| {
self.token = t2;
return token;
}
}
return null;
}
/// Finalize the stream. This marks the end of JSON input. This will
/// return an error if the JSON was not valid.
pub fn finalize(self: *PartialTokenStream) Error!?Token {
var t1: ?Token = undefined;
var t2: ?Token = undefined;
// Without this a bare number fails, the streaming parser doesn't know the input ended
try self.parser.feed(' ', &t1, &t2);
self.i += 1;
if (t1) |token| {
return token;
} else if (self.parser.complete) {
return null;
} else {
return error.UnexpectedEndOfJson;
}
}
/// Returns a partial state that we're in the middle of (if any). Callers
/// can use this to determine if they need to buffer any data.
pub fn partialState(self: PartialTokenStream) ?PartialState {
const state = @enumToInt(self.parser.state);
if (state >= @enumToInt(StreamingParser.State.String) and
state <= @enumToInt(StreamingParser.State.StringEscapeHexUnicode1))
{
return .{ .string = self.parser.count };
}
if (state >= @enumToInt(StreamingParser.State.Number) and
state <= @enumToInt(StreamingParser.State.NumberExponentDigits))
{
return .{ .number = self.parser.count };
}
return null;
}
};
test "PartialTokenStream empty" {
const testing = std.testing;
var stream = PartialTokenStream.init("");
try testing.expect((try stream.next()) == null);
}
test "PartialTokenStream" {
const testing = std.testing;
var stream = PartialTokenStream.init("{");
try testing.expect((try stream.next()).? == .ObjectBegin);
try testing.expect((try stream.next()) == null);
stream.refill("}");
try testing.expect((try stream.next()).? == .ObjectEnd);
try testing.expect((try stream.finalize()) == null);
}
test "PartialTokenStream split string" {
const testing = std.testing;
var stream = PartialTokenStream.init("\"hello");
try testing.expect((try stream.next()) == null);
try testing.expect(stream.partialState().? == .string);
stream.refill(" world\"");
try testing.expect((try stream.next()).? == .String);
try testing.expect(stream.partialState() == null);
try testing.expect((try stream.finalize()) == null);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment