Skip to content

Instantly share code, notes, and snippets.

@ShishKabab
Last active July 30, 2025 18:13
Show Gist options
  • Save ShishKabab/18777096cbd2d07498bda44f0d11ce72 to your computer and use it in GitHub Desktop.
Save ShishKabab/18777096cbd2d07498bda44f0d11ce72 to your computer and use it in GitHub Desktop.
A zero-allocation YAML parser written in Zig supporting a subset of YAML I needed for my own project.

NOTE: This is not meant to be product-ready for every use case out there, but is only here for curious people who are reading my articles. That's why I didn't create a GitHub project out of it: it needs a bit more work and untangling of test files to be useful to people.

const std = @import("std");
const StaticBitStack = @import("utils.zig").StaticBitStack;
pub const YamlLexerTokenType = enum {
identifier,
colon,
integer,
float,
string_single_line,
bool,
whitespace,
newline,
object_start,
object_end,
array_start,
array_end,
array_item,
comma,
comment,
};
pub const YamlLexerTokenIndex = u32;
pub const YamlLexerToken = struct {
start_index: YamlLexerTokenIndex,
end_index: YamlLexerTokenIndex,
content: YamlLexerTokenContent,
};
pub const YamlLexerTokenList = std.ArrayList(YamlLexerToken);
pub const YamlLexerTokenContent = union(YamlLexerTokenType) {
identifier: void,
colon: void,
integer: i64,
float: f64,
string_single_line: YamlLexerStringTokenContent,
bool: bool,
whitespace: void,
newline: void,
object_start: void,
object_end: void,
array_start: void,
array_end: void,
array_item: void,
comma: void,
comment: void,
};
pub const YamlLexerStringTokenContent = struct {
has_escapes: bool,
};
pub const YamlLexerError = error{
unexpected_char,
nesting_depth_exceeded,
unexpected_nesting_end,
};
const YamlLexerObjectEntryLineStage = enum {
start,
newline,
leading_whitespace,
array_item,
last_whitespace,
identifier,
colon,
none,
};
pub const YamlNestingType = enum {
object,
array,
};
pub const YamlNesting = struct {
bit_stack: StaticBitStack = .{},
// debug: bool = false,
pub fn push(self: *YamlNesting, nesting_type: YamlNestingType) !void {
// if (self.debug) {
// std.debug.print("push: {any}\n", .{nesting_type});
// }
try self.bit_stack.push(if (nesting_type == .object) 1 else 0);
}
pub fn get(self: YamlNesting) ?YamlNestingType {
const bit = self.bit_stack.get() orelse {
return null;
};
return if (bit == 1) .object else .array;
}
pub fn pop(self: *YamlNesting, maybe_expected_nesting_type: ?YamlNestingType) !void {
// if (self.debug) {
// std.debug.print("pop (old): {any}\n", .{self.get()});
// }
if (self.get()) |nesting| {
if (maybe_expected_nesting_type) |expected_nesting_type| {
if (nesting != expected_nesting_type) {
return YamlLexerError.unexpected_nesting_end;
}
}
} else {
return YamlLexerError.unexpected_nesting_end;
}
try self.bit_stack.pop();
// if (self.debug) {
// std.debug.print("pop (new): {any}\n", .{self.get()});
// }
}
};
pub const YamlLexerTokenInterator = struct {
source: []const u8,
start_index: YamlLexerTokenIndex = 0,
object_depth: u8 = 0,
object_entry_line: YamlLexerObjectEntryLineStage = .start,
last_token: ?YamlLexerToken = null,
inline_nesting: YamlNesting = .{},
pub fn init(source: []const u8) YamlLexerTokenInterator {
return .{
.source = source,
};
}
pub fn next(self: *YamlLexerTokenInterator) !?YamlLexerToken {
if (self.start_index >= self.source.len) {
return null;
}
var token: YamlLexerToken = undefined;
const at_start_index: u8 = self.source[self.start_index];
var is_entry_value = false;
if (self.object_entry_line != .none) {
if (self.last_token) |last_token| {
if (self.object_entry_line == .newline and last_token.content == .newline) {
self.object_entry_line = .start;
} else if (self.object_entry_line == .start and last_token.content == .identifier) {
self.object_entry_line = .identifier;
} else if (self.object_entry_line == .start and last_token.content == .whitespace) {
self.object_entry_line = .leading_whitespace;
} else if (self.object_entry_line == .leading_whitespace and last_token.content == .identifier) {
self.object_entry_line = .identifier;
} else if (self.object_entry_line == .leading_whitespace and last_token.content == .array_item) {
self.object_entry_line = .array_item;
} else if (self.object_entry_line == .array_item and last_token.content == .whitespace) {
self.object_entry_line = .last_whitespace;
} else if (self.object_entry_line == .last_whitespace and last_token.content == .identifier) {
self.object_entry_line = .identifier;
} else if (self.object_entry_line == .identifier and last_token.content == .colon) {
self.object_entry_line = .colon;
} else if (self.object_entry_line == .colon and last_token.content == .whitespace) {
is_entry_value = true;
self.object_entry_line = .none;
} else {
self.object_entry_line = .none;
}
}
}
if (at_start_index == ':') {
token = self.simpleToken(1, .{ .colon = {} });
self.start_index = token.end_index;
} else if (at_start_index == ',') {
token = self.simpleToken(1, .{ .comma = {} });
self.start_index = token.end_index;
} else if (at_start_index == '-') {
token = self.simpleToken(1, .{ .array_item = {} });
self.start_index = token.end_index;
} else if (at_start_index == '{') {
try self.inline_nesting.push(.object);
token = self.simpleToken(1, .{ .object_start = {} });
self.start_index = token.end_index;
} else if (at_start_index == '}') {
try self.inline_nesting.pop(.object);
token = self.simpleToken(1, .{ .object_end = {} });
self.start_index = token.end_index;
} else if (at_start_index == '[') {
try self.inline_nesting.push(.array);
token = self.simpleToken(1, .{ .array_start = {} });
self.start_index = token.end_index;
} else if (at_start_index == ']') {
try self.inline_nesting.pop(.array);
token = self.simpleToken(1, .{ .array_end = {} });
self.start_index = token.end_index;
} else if (at_start_index == ' ' or at_start_index == '\t') {
var end_index = self.start_index + 1;
while (end_index < self.source.len) : (end_index += 1) {
const at_end_index = self.source[end_index];
if (at_end_index != ' ' and at_end_index != '\t') {
break;
}
}
token = YamlLexerToken{
.start_index = self.start_index,
.end_index = end_index,
.content = .{ .whitespace = {} },
};
self.start_index = token.end_index;
} else if (at_start_index == '\n' or at_start_index == '\r') {
const is_windows = at_start_index == '\r' and self.start_index + 1 < self.source.len and self.source[self.start_index + 1] == '\n';
const offset: u8 = if (is_windows) 2 else 1;
token = self.simpleToken(offset, .{ .newline = {} });
self.object_entry_line = .newline;
self.start_index = token.end_index;
} else if (at_start_index == '#') {
var end_index = self.start_index + 1;
while (end_index < self.source.len) : (end_index += 1) {
const at_end_index = self.source[end_index];
if (at_end_index == '\r' or at_end_index == '\n') {
break;
}
}
token = YamlLexerToken{
.start_index = self.start_index,
.end_index = end_index,
.content = .{ .comment = {} },
};
self.start_index = token.end_index;
} else {
token = try self.parseValue(is_entry_value);
}
self.last_token = token;
return token;
}
fn parseValue(self: *YamlLexerTokenInterator, is_entry_value: bool) !YamlLexerToken {
const at_start_index: u8 = self.source[self.start_index];
if (at_start_index == '\'' or at_start_index == '"') {
const token = self.parseQuotedString(at_start_index);
self.start_index = token.end_index + 1;
return token;
} else if (at_start_index >= '0' and at_start_index <= '9') {
const token = try self.parseNumber();
self.start_index = token.end_index;
return token;
}
var end_index = self.start_index + 1;
const maybe_nesting = self.inline_nesting.get();
var maybe_whitespace_start: ?YamlLexerTokenIndex = null;
while (end_index < self.source.len) : (end_index += 1) {
const at_end_index = self.source[end_index];
if (at_end_index == '\n' or at_end_index == '\r') {
break;
}
if (!is_entry_value and (at_end_index == ',' or at_end_index == ':')) {
break;
}
if (maybe_nesting) |nesting| {
if (at_end_index == ' ' and maybe_whitespace_start == null) {
maybe_whitespace_start = end_index;
}
if ((nesting == .array and at_end_index == ']') or (nesting == .object and at_end_index == '}')) {
if (maybe_whitespace_start) |whitespace_start| {
end_index = whitespace_start;
}
break;
}
if (at_end_index != ' ') {
maybe_whitespace_start = null;
}
}
}
var token = YamlLexerToken{ .start_index = self.start_index, .end_index = end_index, .content = .{
.string_single_line = .{ .has_escapes = false },
} };
const is_key = token.end_index < self.source.len and self.source[token.end_index] == ':';
if (!is_key) {
const sliced = self.source[token.start_index..token.end_index];
if (std.mem.eql(u8, sliced, "true")) {
token.content = .{ .bool = true };
} else if (std.mem.eql(u8, sliced, "false")) {
token.content = .{ .bool = false };
}
} else {
token.content = .{ .identifier = {} };
}
self.start_index = token.end_index;
return token;
}
fn parseNumber(self: *YamlLexerTokenInterator) !YamlLexerToken {
var end_index = self.start_index + 1;
var dot_found = false;
while (end_index < self.source.len) : (end_index += 1) {
const at_end_index = self.source[end_index];
if (at_end_index == '.') {
if (dot_found) {
self.start_index = end_index;
return YamlLexerError.unexpected_char;
} else {
dot_found = true;
}
} else if (at_end_index < '0' or at_end_index > '9') {
break;
}
}
const sliced = self.source[self.start_index..end_index];
return YamlLexerToken{
.start_index = self.start_index,
.end_index = end_index,
.content = if (dot_found) .{
.float = try std.fmt.parseFloat(f64, sliced),
} else .{
.integer = try std.fmt.parseInt(i64, sliced, 10),
},
};
}
fn parseQuotedString(self: YamlLexerTokenInterator, quote: u8) YamlLexerToken {
var end_index = self.start_index + 1;
var escaped = false;
var has_escapes = false;
while (end_index < self.source.len) : (end_index += 1) {
const at_end_index = self.source[end_index];
if (at_end_index == '\\') {
escaped = !escaped;
has_escapes = true;
} else if (at_end_index == quote and !escaped) {
break;
} else {
escaped = false;
}
}
return YamlLexerToken{
.start_index = self.start_index + 1,
.end_index = end_index,
.content = .{ .string_single_line = .{ .has_escapes = has_escapes } },
};
}
fn simpleToken(self: YamlLexerTokenInterator, length: YamlLexerTokenIndex, content: YamlLexerTokenContent) YamlLexerToken {
return YamlLexerToken{
.start_index = self.start_index,
.end_index = self.start_index + length,
.content = content,
};
}
};
fn isValidIdentifierChar(char: u8, first: bool) bool {
return (!first and char >= '0' and char <= '9') or (char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == '_' or char == '.';
}
const std = @import("std");
const lexer = @import("lexer.zig");
const StaticBitStack = @import("utils.zig").StaticBitStack;
const most_significant_bit: u64 = 0b1000000000000000000000000000000000000000000000000000000000000000;
pub const YamlParserToken = struct {
start_index: YamlParserTokenIndex,
end_index: YamlParserTokenIndex,
content: YamlParserTokenContent,
};
pub const YamlParserTokenList = std.ArrayList(YamlParserToken);
pub const YamlParserTokenType = enum {
bool,
float,
integer,
string_single_line,
object_entry_start,
object_entry_end,
array_item_start,
array_item_end,
};
pub const YamlParserTokenContent = union(YamlParserTokenType) {
bool: bool,
float: f64,
integer: i64,
string_single_line: YamlParserStringTokenContent,
object_entry_start: void,
object_entry_end: void,
array_item_start: void,
array_item_end: void,
};
pub const YamlParserStringTokenContent = struct {
has_escapes: bool,
};
pub const YamlParserTokenIndex = u32;
pub const YamlParserError = error{
unexpected_token,
unexpected_end,
invalid_indent,
invalid_outdent,
invalid_indentation,
};
const ParseNewLineOutput = struct {
next_input_token: ?lexer.YamlLexerToken = null,
output_token: ?YamlParserToken = null,
};
pub const YamlParserIterator = struct {
source: []const u8,
lexer_iterator: lexer.YamlLexerTokenInterator,
is_new_line: bool = true,
last_input_token: ?lexer.YamlLexerToken = null,
next_input_token: ?lexer.YamlLexerToken = null,
block_nesting: lexer.YamlNesting = .{},
// for each character from the least significant bit, 0 for spaces, 1 for tabs
leading_whitespace_chars: u64 = 0,
leading_whitespace_size: u6 = 0,
// 1 for each index that started a indentation level
leading_whitespace_levels: u64 = 0,
indentation_level: u6 = 0,
outdents_left: u6 = 0,
last_newline_index: YamlParserTokenIndex = 0,
last_newline_length: u2 = 0,
array_item_expected: bool = false,
array_item_starts_left: u8 = 0,
next_output_token: ?YamlParserToken = null,
lexer_time: u64 = 0,
pub fn init(source: []const u8, lexer_iterator: lexer.YamlLexerTokenInterator) YamlParserIterator {
return .{
.source = source,
.lexer_iterator = lexer_iterator,
};
}
pub fn next(self: *YamlParserIterator) !?YamlParserToken {
const was_file_start = self.last_input_token == null;
if (self.outdents_left > 0) {
self.outdents_left -= 1;
const block_type = self.block_nesting.get() orelse {
return YamlParserError.invalid_outdent;
};
try self.block_nesting.pop(null);
return .{
.start_index = self.last_newline_index,
.end_index = self.last_newline_index + self.last_newline_length,
.content = switch (block_type) {
.object => .{ .object_entry_end = {} },
.array => .{ .array_item_end = {} },
},
};
}
if (self.array_item_starts_left > 0) {
self.array_item_starts_left -= 1;
const start_index = self.next_output_token.?.start_index;
return .{ .start_index = start_index, .end_index = start_index, .content = .{ .array_item_start = {} } };
}
if (self.next_output_token) |token| {
self.next_output_token = null;
return token;
}
if (self.array_item_expected) {
self.array_item_expected = false;
self.array_item_starts_left += 1;
}
var input_token: lexer.YamlLexerToken = undefined;
if (self.next_input_token) |next_input_token| {
input_token = next_input_token;
self.next_input_token = null;
// std.debug.print("IN token (sn): {any}\n", .{input_token});
} else {
input_token = try self.nextInputToken() orelse {
return self.getFinalToken();
};
// std.debug.print("IN token (s): {any}\n", .{input_token});
}
if (was_file_start) {
while (input_token.content == .comment or input_token.content == .newline) {
input_token = try self.nextInputToken() orelse {
return self.getFinalToken();
};
}
}
var has_array_item = false;
var array_item_index: YamlParserTokenIndex = 0;
var has_array_start = false;
var has_object_start = false;
var empty_object = false;
while (true) {
const was_object_start = input_token.content == .object_start;
if (input_token.content == .newline) {
has_array_item = false;
array_item_index = 0;
has_object_start = false;
has_array_start = false;
const result = try self.parseNewLine(input_token);
if (result.next_input_token) |next_input_token| {
input_token = next_input_token;
}
if (result.output_token) |output_token| {
return output_token;
}
} else if (input_token.content == .whitespace) {
input_token = try self.nextInputToken() orelse {
if (self.block_nesting.bit_stack.size > 0) {
return YamlParserError.unexpected_end;
} else {
return self.getFinalToken();
}
};
// std.debug.print("IN token (w): {any}\n", .{input_token});
} else if (input_token.content == .object_start) {
has_object_start = true;
empty_object = true;
// skip these, lexer already keeps track of relevant data
input_token = try self.nextInputToken() orelse {
return self.getFinalToken();
};
// std.debug.print("IN token (os): {any}\n", .{input_token});
} else if (input_token.content == .object_end) {
if (empty_object) {
empty_object = false;
input_token = try self.nextInputToken() orelse {
return self.getFinalToken();
};
} else {
break;
}
// std.debug.print("IN token (oe): {any}\n", .{input_token});
} else if (input_token.content == .array_start) {
has_array_start = true;
input_token = try self.nextInputToken() orelse {
return self.getFinalToken();
};
self.array_item_starts_left += 1;
// std.debug.print("IN token (as): {any}\n", .{input_token});
} else if (input_token.content == .array_item) {
if (has_array_item or has_array_start or has_object_start) {
return YamlParserError.unexpected_token;
}
has_array_item = true;
array_item_index = input_token.start_index;
input_token = try self.nextInputToken() orelse {
return self.getFinalToken();
};
// std.debug.print("IN token (ai): {any}\n", .{input_token});
} else {
break;
}
if (!was_object_start) {
empty_object = false;
}
}
var output_token: ?YamlParserToken = null;
const was_new_line = self.is_new_line;
self.is_new_line = false;
if (was_new_line) {
if (input_token.content == .identifier) {
if (has_array_item) {
try self.block_nesting.push(.array);
if (!has_object_start) {
try self.appendLeadingWhitespace(" ");
}
}
if (!has_object_start) {
try self.block_nesting.push(.object);
}
} else if (has_array_item) {
try self.block_nesting.push(.array);
}
}
if (input_token.content == .identifier) {
if (!has_array_item) {
_ = try self.expectNextToken(.colon);
}
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .object_entry_start = {} },
};
} else if (input_token.content == .string_single_line) {
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .string_single_line = .{ .has_escapes = input_token.content.string_single_line.has_escapes } },
};
} else if (input_token.content == .integer) {
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .integer = input_token.content.integer },
};
} else if (input_token.content == .float) {
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .float = input_token.content.float },
};
} else if (input_token.content == .bool) {
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .bool = input_token.content.bool },
};
} else if (input_token.content == .comma) {
const lexer_nesting = self.lexer_iterator.inline_nesting.get() orelse {
return YamlParserError.unexpected_token;
};
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = switch (lexer_nesting) {
.object => .{ .object_entry_end = {} },
.array => .{ .array_item_end = {} },
},
};
if (lexer_nesting == .array) {
self.array_item_expected = true;
}
switch (lexer_nesting) {
.object => {
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .object_entry_end = {} },
};
},
.array => {},
}
} else if (input_token.content == .object_end) {
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .object_entry_end = {} },
};
} else if (input_token.content == .array_end) {
output_token = .{
.start_index = input_token.start_index,
.end_index = input_token.end_index,
.content = .{ .array_item_end = {} },
};
}
if (output_token == null) {
return YamlParserError.unexpected_token;
}
if (has_array_item) {
self.next_input_token = input_token;
return .{ .start_index = output_token.?.start_index, .end_index = output_token.?.start_index, .content = .{ .array_item_start = {} } };
}
if (self.array_item_starts_left > 0) {
self.next_output_token = output_token;
self.array_item_starts_left -= 1;
return .{ .start_index = output_token.?.start_index, .end_index = output_token.?.start_index, .content = .{ .array_item_start = {} } };
}
return output_token;
}
fn parseNewLine(self: *YamlParserIterator, orig_input_token: lexer.YamlLexerToken) !ParseNewLineOutput {
var input_token = orig_input_token;
var maybe_next_input_token: ?lexer.YamlLexerToken = input_token;
var maybe_leading_whitespace_token: ?lexer.YamlLexerToken = null;
self.is_new_line = true;
var is_last_line = false;
while (maybe_next_input_token) |next_input_token| {
if (next_input_token.content == .whitespace) {
maybe_leading_whitespace_token = next_input_token;
} else if (next_input_token.content == .newline) {
maybe_leading_whitespace_token = null;
self.last_newline_index = input_token.start_index;
self.last_newline_length = @intCast(input_token.end_index - input_token.start_index);
} else if (next_input_token.content != .comment) {
break;
}
maybe_next_input_token = try self.nextInputToken();
// std.debug.print("IN token (n): {any}\n", .{maybe_next_input_token});
if (maybe_next_input_token == null) {
is_last_line = true;
break;
}
}
const prev_indent_level = self.indentation_level;
if (maybe_leading_whitespace_token) |leading_whitespace_token| {
if (leading_whitespace_token.content == .whitespace) {
try self.detectLeadingWhitespace(self.source[leading_whitespace_token.start_index..leading_whitespace_token.end_index]);
} else {
try self.detectLeadingWhitespace("");
}
} else {
try self.detectLeadingWhitespace("");
}
// std.debug.print("{d} -> {d}\n", .{ prev_indent_level, self.indentation_level });
if (self.indentation_level <= prev_indent_level) {
if (self.indentation_level < prev_indent_level) {
self.outdents_left = prev_indent_level - self.indentation_level;
}
if (!is_last_line) {
if (maybe_next_input_token) |next_input_token| {
self.next_input_token = next_input_token;
}
}
const block_type = self.block_nesting.get() orelse {
return YamlParserError.invalid_outdent;
};
try self.block_nesting.pop(null);
return .{ .output_token = .{
.start_index = self.last_newline_index,
.end_index = self.last_newline_index + self.last_newline_length,
.content = switch (block_type) {
.object => .{ .object_entry_end = {} },
.array => .{ .array_item_end = {} },
},
} };
} else if (maybe_next_input_token) |next_input_token| {
input_token = next_input_token;
} else {
return .{ .output_token = try self.getFinalToken() };
}
return .{
.next_input_token = input_token,
};
}
fn nextInputToken(self: *YamlParserIterator) !?lexer.YamlLexerToken {
var timer = try std.time.Timer.start();
const input_token = try self.lexer_iterator.next();
self.lexer_time += timer.read();
if (input_token != null) {
self.last_input_token = input_token;
}
return input_token;
}
fn getFinalToken(self: *YamlParserIterator) !?YamlParserToken {
if (self.block_nesting.get()) |nesting| {
if (self.last_input_token) |last_input_token| {
try self.block_nesting.pop(null);
return .{
.start_index = last_input_token.end_index,
.end_index = last_input_token.end_index,
.content = switch (nesting) {
.object => .object_entry_end,
.array => .array_item_end,
},
};
}
}
return null;
}
pub fn expectNextToken(self: *YamlParserIterator, maybe_expected: ?lexer.YamlLexerTokenType) !lexer.YamlLexerToken {
const token = try self.nextInputToken() orelse {
return YamlParserError.unexpected_end;
};
if (maybe_expected) |expected| {
// std.debug.print("expect next ({any}): {any}\n", .{ expected, token });
if (token.content != expected) {
return YamlParserError.unexpected_token;
}
}
return token;
}
pub fn appendLeadingWhitespace(self: *YamlParserIterator, str: []const u8) !void {
var next_whitespace_chars: u64 = self.leading_whitespace_chars;
for (str, 0..) |char, index| {
std.debug.assert(char == ' ' or char == '\t');
const bit: u64 = @intFromBool(char == '\t');
const shift = @as(u6, @intCast(index)) + self.leading_whitespace_size;
next_whitespace_chars = next_whitespace_chars | (bit << shift);
}
const indentation_diff: u6 = @intCast(str.len);
self.leading_whitespace_levels = self.leading_whitespace_levels >> indentation_diff;
self.leading_whitespace_levels = self.leading_whitespace_levels | (most_significant_bit >> (indentation_diff - 1));
self.leading_whitespace_chars = next_whitespace_chars;
self.leading_whitespace_size += indentation_diff;
self.indentation_level += 1;
}
pub fn detectLeadingWhitespace(self: *YamlParserIterator, next_whitespace_string: []const u8) !void {
std.debug.assert(next_whitespace_string.len < 64);
const next_whitespace_size: u6 = @intCast(next_whitespace_string.len);
var next_whitespace_chars: u64 = 0;
for (next_whitespace_string, 0..) |char, index| {
std.debug.assert(char == ' ' or char == '\t');
const bit: u64 = @intFromBool(char == '\t');
next_whitespace_chars = next_whitespace_chars | (bit << @as(u6, @intCast(index)));
}
if (next_whitespace_size > self.leading_whitespace_size) {
const common_mask = leastSignificantBits(self.leading_whitespace_size);
if (self.leading_whitespace_chars & common_mask != next_whitespace_chars & common_mask) {
return YamlParserError.invalid_indent;
}
self.indentation_level += 1;
const indentation_diff: u6 = next_whitespace_size - self.leading_whitespace_size;
self.leading_whitespace_levels = self.leading_whitespace_levels >> indentation_diff;
self.leading_whitespace_levels = self.leading_whitespace_levels | (most_significant_bit >> (indentation_diff - 1));
} else if (next_whitespace_size < self.leading_whitespace_size) {
const common_mask = leastSignificantBits(next_whitespace_size);
if (self.leading_whitespace_chars & common_mask != next_whitespace_chars & common_mask) {
return YamlParserError.invalid_outdent;
}
var indentation_diff: u6 = self.leading_whitespace_size - next_whitespace_size;
var next_indentation_level = self.indentation_level;
var next_levels = self.leading_whitespace_levels;
while (true) {
const expected_outdent: u6 = @intCast(@clz(next_levels) + 1);
next_levels = next_levels << @intCast(@clz(next_levels) + 1);
if (indentation_diff >= expected_outdent) {
next_indentation_level -= 1;
indentation_diff -= expected_outdent;
if (indentation_diff == 0) {
break;
} else {
if (next_levels == 0) {
return YamlParserError.invalid_outdent;
}
}
} else {
return YamlParserError.invalid_outdent;
}
}
self.leading_whitespace_levels = next_levels;
self.indentation_level = next_indentation_level;
} else {
const common_mask = leastSignificantBits(self.leading_whitespace_size);
if (self.leading_whitespace_chars & common_mask != next_whitespace_chars & common_mask) {
return YamlParserError.invalid_indentation;
}
}
// self.leading_whitespace_levels |= level_bit;
self.leading_whitespace_size = next_whitespace_size;
self.leading_whitespace_chars = next_whitespace_chars;
}
};
fn leastSignificantBits(n: u6) u64 {
if (n >= 64) return 0xFFFFFFFFFFFFFFFF;
return (@as(u64, 1) << n) - 1;
}
const std = @import("std");
pub const StatickBitStackError = error{
exceeded_capacity,
popped_empty,
};
pub const StaticBitStack = struct {
stack: u64 = 0,
size: u6 = 0,
pub fn push(self: *StaticBitStack, value: u1) !void {
if (self.size == 63) {
return StatickBitStackError.exceeded_capacity;
}
self.size += 1;
self.stack = self.stack << 1;
if (self.stack & 1 != value) {
self.stack ^= 1;
}
}
pub fn get(self: StaticBitStack) ?u1 {
if (self.size > 0) {
return @intCast(self.stack & 1);
} else {
return null;
}
}
pub fn pop(self: *StaticBitStack) !void {
if (self.size == 0) {
return StatickBitStackError.popped_empty;
}
self.size -= 1;
self.stack = self.stack >> 1;
}
};
test "BitStack" {
var bit_stack = StaticBitStack{};
try bit_stack.push(1);
try bit_stack.push(0);
try bit_stack.push(0);
try bit_stack.push(1);
try std.testing.expectEqual(1, bit_stack.get());
try bit_stack.pop();
try std.testing.expectEqual(0, bit_stack.get());
try bit_stack.pop();
try std.testing.expectEqual(0, bit_stack.get());
try bit_stack.pop();
try std.testing.expectEqual(1, bit_stack.get());
try bit_stack.pop();
try std.testing.expectEqual(null, bit_stack.get());
try bit_stack.push(1);
try std.testing.expectEqual(1, bit_stack.get());
try bit_stack.pop();
try std.testing.expectEqual(null, bit_stack.get());
try bit_stack.push(0);
try std.testing.expectEqual(0, bit_stack.get());
try bit_stack.pop();
try std.testing.expectEqual(null, bit_stack.get());
}
const std = @import("std");
const lexer = @import("yaml/lexer.zig");
const testing = @import("utils/testing.zig");
const pretty = @import("external/pretty.zig");
const ShortExpectedLexerToken = struct {
length: lexer.YamlLexerTokenIndex,
content: lexer.YamlLexerTokenContent,
start_offset: lexer.YamlLexerTokenIndex = 0,
end_offset: lexer.YamlLexerTokenIndex = 0,
};
fn testLexing(short_expected: []const ShortExpectedLexerToken, source: []const u8) !void {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
const allocator = arena.allocator();
defer arena.deinit();
var expected = lexer.YamlLexerTokenList.init(allocator);
var start_index: lexer.YamlLexerTokenIndex = 0;
for (short_expected) |short| {
start_index += short.start_offset;
try expected.append(.{
.start_index = start_index,
.end_index = start_index + short.length,
.content = short.content,
});
start_index += short.length;
start_index += short.end_offset;
}
var tokenizer = lexer.YamlLexerTokenInterator.init(source);
var tokens = lexer.YamlLexerTokenList.init(allocator);
var parsing_time: u64 = 0;
var timer = try std.time.Timer.start();
while (try tokenizer.next()) |token| {
parsing_time += timer.read();
try tokens.append(token);
timer.reset();
}
// std.debug.print("parsing time: {d}ns\n", .{parsing_time});
std.testing.expectEqualDeep(expected.items, tokens.items) catch |err| {
var slices = std.ArrayList([]const u8).init(allocator);
for (tokens.items) |token| {
try slices.append(source[token.start_index..token.end_index]);
}
std.debug.print("\nactual slices:\n\n", .{});
try pretty.print(allocator, slices.items, .{});
std.debug.print("\nactual:\n\n", .{});
try pretty.print(allocator, tokens.items, .{});
std.debug.print("\nexpected:\n\n", .{});
try pretty.print(allocator, expected.items, .{});
std.debug.print("\n", .{});
return err;
};
}
test "lexer: top-level true" {
try testLexing(&.{
.{ .length = 4, .content = .{ .bool = true } },
}, "true");
}
test "lexer: top-level false" {
try testLexing(&.{
.{ .length = 5, .content = .{ .bool = false } },
}, "false");
}
test "lexer: top-level one-entry object" {
try testLexing(&.{
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 5, .content = .{ .bool = false } },
}, "key: false");
}
test "lexer: top-level two-entry object" {
try testLexing(&.{
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 5, .content = .{ .bool = false } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 4, .content = .{ .bool = true } },
}, "key: false\nkey2: true");
}
test "lexer: nested objects" {
const source =
\\parent:
\\ child: true
\\key: false
;
try testLexing(&.{
.{ .length = 6, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 2, .content = .{ .whitespace = {} } },
.{ .length = 5, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 4, .content = .{ .bool = true } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 5, .content = .{ .bool = false } },
}, source);
}
test "lexer: single-line strings" {
const source =
\\key1: "test key1 value"
\\key2: 'test key2 value'
\\key3: "test\"key3 value"
\\key4: 'test\'key4 value'
\\key5: test key5 value
;
try testLexing(&.{
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 15, .start_offset = 1, .end_offset = 1, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 15, .start_offset = 1, .end_offset = 1, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 16, .start_offset = 1, .end_offset = 1, .content = .{ .string_single_line = .{ .has_escapes = true } } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 16, .start_offset = 1, .end_offset = 1, .content = .{ .string_single_line = .{ .has_escapes = true } } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 15, .content = .{ .string_single_line = .{ .has_escapes = false } } },
}, source);
}
test "lexer: numbers" {
const source =
\\int: 221
\\flt: 23.12
;
try testLexing(&.{
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 3, .content = .{ .integer = 221 } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 5, .content = .{ .float = 23.12 } },
}, source);
}
test "lexer: inline object" {
const source =
\\obj: { key1: true, key2: value2 space }
;
try testLexing(&.{
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .object_start = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 4, .content = .{ .bool = true } },
.{ .length = 1, .content = .{ .comma = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 4, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 12, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .object_end = {} } },
}, source);
}
test "lexer: inline array" {
const source =
\\obj: [ value1, value2 ]
;
try testLexing(&.{
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .array_start = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 6, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .content = .{ .comma = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 6, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .array_end = {} } },
}, source);
}
test "lexer: list array" {
const source =
\\obj:
\\ - value1
\\ - true
\\ - key: 12
\\ - key: test test
;
try testLexing(&.{
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 2, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .array_item = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 6, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 2, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .array_item = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 4, .content = .{ .bool = true } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 2, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .array_item = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 2, .content = .{ .integer = 12 } },
.{ .length = 1, .content = .{ .newline = {} } },
.{ .length = 2, .content = .{ .whitespace = {} } },
.{ .length = 1, .content = .{ .array_item = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 3, .content = .{ .identifier = {} } },
.{ .length = 1, .content = .{ .colon = {} } },
.{ .length = 1, .content = .{ .whitespace = {} } },
.{ .length = 9, .content = .{ .string_single_line = .{ .has_escapes = false } } },
}, source);
}
const std = @import("std");
const lexer = @import("yaml/lexer.zig");
const parser = @import("yaml/parser.zig");
const testing = @import("utils/testing.zig");
const pretty = @import("external/pretty.zig");
const ShortExpectedParserToken = struct {
length: i16,
content: parser.YamlParserTokenContent,
start_offset: i8,
end_offset: i8 = 0,
};
fn testParsing(short_expected: []const ShortExpectedParserToken, source: []const u8) !void {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
const allocator = arena.allocator();
defer arena.deinit();
var expected = parser.YamlParserTokenList.init(allocator);
var expected_start_index: i16 = 0;
for (short_expected) |short| {
expected_start_index += @intCast(short.start_offset);
try expected.append(.{
.start_index = @intCast(expected_start_index),
.end_index = @intCast(expected_start_index + short.length),
.content = short.content,
});
expected_start_index += @intCast(short.length);
expected_start_index += @intCast(short.end_offset);
}
var tokenizer = parser.YamlParserIterator.init(source, lexer.YamlLexerTokenInterator.init(source));
var tokens = parser.YamlParserTokenList.init(allocator);
var parsing_time: u64 = 0;
var timer = try std.time.Timer.start();
while (try tokenizer.next()) |token| {
parsing_time += timer.read();
try tokens.append(token);
// const text = source[token.start_index..token.end_index];
// std.debug.print("OUT token {s}: \"{s}\"\n\n", .{
// @tagName(token.content),
// if (text.len == 1 and text[0] == '\n') "\\n" else text,
// });
}
// std.debug.print("parsing time: {d}ns\n", .{parsing_time});
std.testing.expectEqualDeep(expected.items, tokens.items) catch |err| {
var slices = std.ArrayList([]const u8).init(allocator);
for (tokens.items) |token| {
const text = source[token.start_index..token.end_index];
if (text.len == 1 and text[0] == '\n') {
try slices.append(try std.fmt.allocPrint(
allocator,
"\\n ({s})",
.{@tagName(token.content)},
));
} else {
try slices.append(try std.fmt.allocPrint(
allocator,
"{s} ({s})",
.{ text, @tagName(token.content) },
));
}
}
try pretty.print(allocator, slices.items, .{});
std.debug.print("\nactual:\n\n", .{});
try pretty.print(allocator, tokens.items, .{});
std.debug.print("\nexpected:\n\n", .{});
try pretty.print(allocator, expected.items, .{});
std.debug.print("\n", .{});
var short_actual = std.ArrayList(ShortExpectedParserToken).init(allocator);
var actual_start_index: i32 = 0;
for (tokens.items) |token| {
try short_actual.append(.{
.start_offset = @intCast(@as(i32, @intCast(token.start_index)) - actual_start_index),
.length = @intCast(token.end_index - token.start_index),
.content = token.content,
});
actual_start_index = @intCast(token.end_index);
}
std.debug.print("\nactual short: {any}\n", .{short_actual.items});
return err;
};
}
test "parser: nesting detection" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
const allocator = arena.allocator();
_ = allocator; // autofix
defer arena.deinit();
var tokenizer = parser.YamlParserIterator.init("", lexer.YamlLexerTokenInterator.init(""));
try tokenizer.detectLeadingWhitespace("");
try std.testing.expectEqual(0, tokenizer.leading_whitespace_chars);
try std.testing.expectEqual(0, tokenizer.leading_whitespace_size);
try std.testing.expectEqual(0b0, tokenizer.leading_whitespace_levels);
try std.testing.expectEqual(0, tokenizer.indentation_level);
try tokenizer.detectLeadingWhitespace(" \t");
try std.testing.expectEqual(0b10, tokenizer.leading_whitespace_chars);
try std.testing.expectEqual(2, tokenizer.leading_whitespace_size);
try std.testing.expectEqual(0b0100000000000000000000000000000000000000000000000000000000000000, tokenizer.leading_whitespace_levels);
try std.testing.expectEqual(1, tokenizer.indentation_level);
try tokenizer.detectLeadingWhitespace(" \t \t");
try std.testing.expectEqual(0b10010, tokenizer.leading_whitespace_chars);
try std.testing.expectEqual(5, tokenizer.leading_whitespace_size);
try std.testing.expectEqual(0b0010100000000000000000000000000000000000000000000000000000000000, tokenizer.leading_whitespace_levels);
try std.testing.expectEqual(2, tokenizer.indentation_level);
try std.testing.expectError(
parser.YamlParserError.invalid_indent,
tokenizer.detectLeadingWhitespace(" "),
);
try std.testing.expectError(
parser.YamlParserError.invalid_outdent,
tokenizer.detectLeadingWhitespace(" \t "),
);
try std.testing.expectError(
parser.YamlParserError.invalid_indentation,
tokenizer.detectLeadingWhitespace(" "),
);
try tokenizer.detectLeadingWhitespace(" \t");
try std.testing.expectEqual(0b10, tokenizer.leading_whitespace_chars);
try std.testing.expectEqual(2, tokenizer.leading_whitespace_size);
try std.testing.expectEqual(0b0100000000000000000000000000000000000000000000000000000000000000, tokenizer.leading_whitespace_levels);
try std.testing.expectEqual(1, tokenizer.indentation_level);
try tokenizer.detectLeadingWhitespace(" \t \t");
try tokenizer.detectLeadingWhitespace(" \t");
// std.debug.print("\nact:{s}\nexp:{s}\n", .{
// try fmt.allocFormatBits(allocator, tokenizer.leading_whitespace_levels),
// try fmt.allocFormatBits(allocator, 0b0100000000000000000000000000000000000000000000000000000000000000),
// });
try std.testing.expectEqual(0b10, tokenizer.leading_whitespace_chars);
try std.testing.expectEqual(2, tokenizer.leading_whitespace_size);
try std.testing.expectEqual(0b0100000000000000000000000000000000000000000000000000000000000000, tokenizer.leading_whitespace_levels);
try std.testing.expectEqual(1, tokenizer.indentation_level);
try tokenizer.appendLeadingWhitespace(" ");
try std.testing.expectEqual(0b0010, tokenizer.leading_whitespace_chars);
try std.testing.expectEqual(4, tokenizer.leading_whitespace_size);
try std.testing.expectEqual(0b0101000000000000000000000000000000000000000000000000000000000000, tokenizer.leading_whitespace_levels);
try std.testing.expectEqual(2, tokenizer.indentation_level);
}
// test "parser: block nesting" {
// var nesting = parser.YamlComplexNesting{};
// try nesting.push(.array);
// try nesting.push(.object);
// try std.testing.expectEqual(.object, nesting.get());
// try std.testing.expectEqual(false, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// try nesting.pop();
// try std.testing.expectEqual(.array, nesting.get());
// try std.testing.expectEqual(false, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// try nesting.pop();
// try std.testing.expectEqual(null, nesting.get());
// try std.testing.expectEqual(false, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// try nesting.push(.object);
// try std.testing.expectEqual(false, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// try nesting.push(.object_and_array);
// try std.testing.expectEqual(.object, nesting.get());
// try std.testing.expectEqual(true, nesting.pending_array_get);
// try std.testing.expectEqual(true, nesting.pending_object_pop);
// try nesting.pop();
// try std.testing.expectEqual(.array, nesting.get());
// try std.testing.expectEqual(true, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// try nesting.pop();
// try std.testing.expectEqual(.object, nesting.get());
// try nesting.pop();
// try std.testing.expectEqual(null, nesting.get());
// try std.testing.expectEqual(false, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// try nesting.push(.object_and_array);
// try std.testing.expectEqual(.object, nesting.get());
// try std.testing.expectEqual(true, nesting.pending_array_get);
// try std.testing.expectEqual(true, nesting.pending_object_pop);
// try nesting.push(.object);
// try std.testing.expectEqual(.object, nesting.get());
// try std.testing.expectEqual(false, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// try nesting.pop();
// try std.testing.expectEqual(.object, nesting.get());
// try std.testing.expectEqual(true, nesting.pending_array_get);
// try std.testing.expectEqual(true, nesting.pending_object_pop);
// try nesting.pop();
// try std.testing.expectEqual(.array, nesting.get());
// try std.testing.expectEqual(true, nesting.pending_array_get);
// try std.testing.expectEqual(false, nesting.pending_object_pop);
// }
test "parser: top-level true" {
try testParsing(&.{
.{ .length = 4, .start_offset = 0, .content = .{ .bool = true } },
}, "true");
}
test "parser: top-level false" {
try testParsing(&.{
.{ .length = 5, .start_offset = 0, .content = .{ .bool = false } },
}, "false");
}
test "parser: top-level one-entry object" {
try testParsing(&.{
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 5, .start_offset = 2, .content = .{ .bool = false } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, "key: false");
}
test "parser: top-level two-entry object" {
try testParsing(&.{
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 5, .start_offset = 2, .content = .{ .bool = false } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 4, .start_offset = 2, .content = .{ .bool = true } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, "key: false\nkey2: true");
}
test "parser: nested objects" {
const source =
\\parent:
\\ child: 0
\\key: 1
\\parent2:
\\ child2:
\\ child3: 2
\\key2: 3
;
try testParsing(&.{
// parent - start
.{ .length = 6, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 5, .start_offset = 4, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 0 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// parent - end
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
// key
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 1 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// parent2
.{ .length = 7, .start_offset = 0, .content = .{ .object_entry_start = {} } },
// child2
.{ .length = 6, .start_offset = 4, .content = .{ .object_entry_start = {} } },
// child3
.{ .length = 6, .start_offset = 6, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 2 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
// key2
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 3 } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: comments" {
const source =
\\parent:
\\ child: 0
\\ # comment
\\# comment
\\key: 1
\\parent2:
\\ child2:
\\ child3: 2
\\key2: 3
;
try testParsing(&.{
// parent - start
.{ .length = 6, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 5, .start_offset = 4, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 0 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// parent - end
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
// key
.{ .length = 3, .start_offset = 22, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 1 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// parent2
.{ .length = 7, .start_offset = 0, .content = .{ .object_entry_start = {} } },
// child2
.{ .length = 6, .start_offset = 4, .content = .{ .object_entry_start = {} } },
// child3
.{ .length = 6, .start_offset = 6, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 2 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
// key2
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 1, .start_offset = 2, .content = .{ .integer = 3 } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: single-line strings" {
const source =
\\key1: "test key1 value"
\\key2: 'test key2 value'
\\key3: "test\"key3 value"
\\key4: 'test\'key4 value'
\\key5: test key5 value
;
try testParsing(&.{
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 15, .start_offset = 3, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 15, .start_offset = 3, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 16, .start_offset = 3, .content = .{ .string_single_line = .{ .has_escapes = true } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 16, .start_offset = 3, .content = .{ .string_single_line = .{ .has_escapes = true } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 15, .start_offset = 2, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: numbers" {
const source =
\\int: 221
\\flt: 23.12
;
try testParsing(&.{
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 3, .start_offset = 2, .content = .{ .integer = 221 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 5, .start_offset = 2, .content = .{ .float = 23.12 } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: inline object" {
const source =
\\obj: { key1: true, key2: value2 space }
;
try testParsing(&.{
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 4, .start_offset = 4, .content = .{ .object_entry_start = {} } },
.{ .length = 4, .start_offset = 2, .content = .{ .bool = true } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 4, .start_offset = 1, .content = .{ .object_entry_start = {} } },
.{ .length = 12, .start_offset = 2, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: inline array" {
const source =
\\obj: [ value1, true, [123, test test] ]
;
try testParsing(&.{
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 0, .start_offset = 4, .content = .{ .array_item_start = {} } },
.{ .length = 6, .start_offset = 0, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 1, .content = .{ .array_item_start = {} } },
.{ .length = 4, .start_offset = 0, .content = .{ .bool = true } },
.{ .length = 1, .start_offset = 0, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 2, .content = .{ .array_item_start = {} } },
.{ .length = 0, .start_offset = 0, .content = .{ .array_item_start = {} } },
.{ .length = 3, .start_offset = 0, .content = .{ .integer = 123 } },
.{ .length = 1, .start_offset = 0, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 1, .content = .{ .array_item_start = {} } },
.{ .length = 9, .start_offset = 0, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .array_item_end = {} } },
.{ .length = 1, .start_offset = 1, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: list array" {
const source =
\\obj:
\\ - value1
\\ - true
\\ - key: 12
\\ - key:
\\ foo: 34
\\ - key: 56
\\ foo: 78
;
try testParsing(&.{
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 0, .start_offset = 6, .content = .{ .array_item_start = {} } },
.{ .length = 6, .start_offset = 0, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 4, .content = .{ .array_item_start = {} } },
.{ .length = 4, .start_offset = 0, .content = .{ .bool = true } },
.{ .length = 1, .start_offset = 0, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 4, .content = .{ .array_item_start = {} } },
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 2, .start_offset = 2, .content = .{ .integer = 12 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 1, .start_offset = -1, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 4, .content = .{ .array_item_start = {} } },
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 3, .start_offset = 8, .content = .{ .object_entry_start = {} } },
.{ .length = 2, .start_offset = 2, .content = .{ .integer = 34 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
.{ .length = 1, .start_offset = -1, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 4, .content = .{ .array_item_start = {} } },
.{ .length = 3, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 2, .start_offset = 2, .content = .{ .integer = 56 } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 3, .start_offset = 4, .content = .{ .object_entry_start = {} } },
.{ .length = 2, .start_offset = 2, .content = .{ .integer = 78 } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
.{ .length = 0, .start_offset = 0, .content = .{ .array_item_end = {} } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: bug #1" {
const source =
\\user:
\\ home_base_set: { type: boolean, default: false }
\\connections:
\\ - { source: frontend.app.services.auth, target: backend.auth }
\\ - { source: frontend.app.screens.login, target: frontend.app.services.auth }
;
try testParsing(&.{
// user
.{ .length = 4, .start_offset = 0, .content = .{ .object_entry_start = {} } },
// homebase
.{ .length = 13, .start_offset = 4, .content = .{ .object_entry_start = {} } },
// type
.{ .length = 4, .start_offset = 4, .content = .{ .object_entry_start = {} } },
.{ .length = 7, .start_offset = 2, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// default
.{ .length = 7, .start_offset = 1, .content = .{ .object_entry_start = {} } },
.{ .length = 5, .start_offset = 2, .content = .{ .bool = false } },
.{ .length = 1, .start_offset = 1, .content = .{ .object_entry_end = {} } },
// homebase - end
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// user - end
.{ .length = 1, .start_offset = -1, .content = .{ .object_entry_end = {} } },
// connections
.{ .length = 11, .start_offset = 0, .content = .{ .object_entry_start = {} } },
// connections[0]
.{ .length = 0, .start_offset = 8, .content = .{ .array_item_start = {} } },
// source
.{ .length = 6, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 26, .start_offset = 2, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// target
.{ .length = 6, .start_offset = 1, .content = .{ .object_entry_start = {} } },
.{ .length = 12, .start_offset = 2, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 1, .content = .{ .object_entry_end = {} } },
// connections[0] - end
.{ .length = 1, .start_offset = 0, .content = .{ .array_item_end = {} } },
// connections[1]
.{ .length = 0, .start_offset = 6, .content = .{ .array_item_start = {} } },
// source
.{ .length = 6, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 26, .start_offset = 2, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 0, .content = .{ .object_entry_end = {} } },
// target
.{ .length = 6, .start_offset = 1, .content = .{ .object_entry_start = {} } },
.{ .length = 26, .start_offset = 2, .content = .{ .string_single_line = .{ .has_escapes = false } } },
.{ .length = 1, .start_offset = 1, .content = .{ .object_entry_end = {} } },
// connections[1] - end
.{ .length = 0, .start_offset = 0, .content = .{ .array_item_end = {} } },
// connections - end
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
test "parser: bug #2" {
const source =
\\components:
\\ children: {}
;
try testParsing(&.{
.{ .length = 10, .start_offset = 0, .content = .{ .object_entry_start = {} } },
.{ .length = 8, .start_offset = 4, .content = .{ .object_entry_start = {} } },
.{ .length = 0, .start_offset = 4, .content = .{ .object_entry_end = {} } },
.{ .length = 0, .start_offset = 0, .content = .{ .object_entry_end = {} } },
}, source);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment