Skip to content

Instantly share code, notes, and snippets.

@archaistvolts
Last active March 12, 2021 05:39
Show Gist options
  • Select an option

  • Save archaistvolts/ef723e57874d30b7bbc3f4fb3667ae1e to your computer and use it in GitHub Desktop.

Select an option

Save archaistvolts/ef723e57874d30b7bbc3f4fb3667ae1e to your computer and use it in GitHub Desktop.
Parser combinators in zig
// adapted from https://gist.github.com/slimsag/db2dd2c49aa038e23b654120e70c9b00
const std = @import("std");
const Allocator = std.mem.Allocator;
pub const Error = error{
EndOfStream,
Utf8InvalidStartByte,
} || std.fs.File.ReadError || std.fs.File.SeekError || std.mem.Allocator.Error;
pub fn Parser(comptime Value: type, comptime Reader: type) type {
return struct {
const Self = @This();
pub const ReaderT = Reader;
pub const ValueT = Value;
_parse: fn (self: *Self, allocator: *Allocator, src: *Reader) Error!?Value,
pub fn parse(self: *Self, allocator: *Allocator, src: *Reader) Error!?Value {
return self._parse(self, allocator, src);
}
};
}
pub fn String(comptime Reader: type) type {
return struct {
parser: P = .{ ._parse = parse },
want: []const u8,
const Self = @This();
pub const P = Parser([]const u8, Reader);
pub fn init(want: []const u8) Self {
return Self{ .want = want };
}
fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?[]const u8 {
const self = @fieldParentPtr(Self, "parser", parser);
const buf = try allocator.alloc(u8, self.want.len);
const read = try src.reader().readAll(buf);
std.log.debug("Literal read {} buf {s} want.len {}", .{ read, buf[0..read], self.want.len });
if (read < self.want.len or !std.mem.eql(u8, buf, self.want)) {
try src.seekableStream().seekBy(-@intCast(i64, read));
allocator.free(buf);
return null;
}
return buf;
}
};
}
pub fn OneOf(comptime Value: type, comptime Reader: type) type {
return struct {
parser: P = .{ ._parse = parse },
parsers: []*P,
const Self = @This();
const P = Parser(Value, Reader);
pub fn init(parsers: []*P) Self {
return Self{ .parsers = parsers };
}
fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?Value {
const self = @fieldParentPtr(Self, "parser", parser);
for (self.parsers) |one_of_parser| {
const result = try one_of_parser.parse(allocator, src);
if (result != null) {
return result;
}
}
return null;
}
};
}
pub fn Seq(comptime Value: type, comptime Reader: type) type {
return struct {
parser: P = .{ ._parse = parse },
parsers: []*P,
const Self = @This();
const P = Parser(Value, Reader);
pub fn init(parsers: []*P) Self {
return Self{ .parsers = parsers };
}
fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?Value {
const self = @fieldParentPtr(Self, "parser", parser);
var results = std.ArrayList(std.meta.Child(Value)).init(allocator);
const pos = try src.seekableStream().getPos();
for (self.parsers) |seq_parser| {
const result = try seq_parser.parse(allocator, src);
std.log.debug("result {s}\n", .{result});
if (result == null) {
try src.seekableStream().seekTo(pos);
results.deinit();
return null;
}
try results.appendSlice(result.?);
allocator.free(result.?);
}
return results.toOwnedSlice();
}
};
}
pub fn Opt(comptime P: type) type {
return struct {
parser: P = .{ ._parse = parse },
base_parser: *P,
const Self = @This();
pub fn init(base_parser: *P) Self {
return Self{ .base_parser = base_parser };
}
fn parse(parser: *P, allocator: *Allocator, src: *P.ReaderT) Error!?P.ValueT {
const self = @fieldParentPtr(Self, "parser", parser);
const result = if (try self.base_parser.parse(allocator, src)) |res| res else std.mem.zeroes(P.ValueT);
std.log.debug("opt result {s}", .{result});
return result;
}
};
}
pub fn ParserFamily(comptime Value: type, comptime Reader: type) type {
return struct {
const P = Parser(Value, Reader);
fn wrap(p: anytype) fn (anytype) callconv(.Inline) *P {
return struct {
fn func(args: anytype) callconv(.Inline) *P {
return &p(args).parser;
}
}.func;
}
fn wrap2(p: anytype) fn ([]*P) callconv(.Inline) *P {
return struct {
fn func(args: []*P) callconv(.Inline) *P {
return &p(args).parser;
}
}.func;
}
pub const str = wrap(String(Reader).init);
pub const oneof = wrap2(OneOf(Value, Reader).init);
pub const seq = wrap2(Seq(Value, Reader).init);
pub const opt = wrap(Opt(P).init);
};
}
// ------------------------------
// Tests
// ------------------------------
const t = std.testing;
fn testCase(input: []const u8, parser: anytype, expected: ?[]const u8, remainder: ?[]const u8) !void {
const allocator = t.allocator;
var reader = std.io.fixedBufferStream(input);
var result = try parser.parse(allocator, &reader);
std.log.debug("result {s}", .{result});
if (expected == null) {
if (result != null) std.log.err("expected null but got {s}", .{result});
t.expect(result == null);
} else {
t.expect(result != null);
t.expectEqualStrings(expected.?, result.?);
allocator.free(result.?);
}
if (remainder) |rem| {
var buf: [0x100]u8 = undefined;
const bytes_read = try reader.read(&buf);
t.expectEqualStrings(rem, buf[0..bytes_read]);
}
}
const Fbs = std.io.FixedBufferStream([]const u8);
const Str = String(Fbs);
const Pf = ParserFamily([]const u8, Fbs);
test "literal" {
try testCase("abcdef", Pf.str("abc"), "abc", "def");
try testCase("abcdef", Pf.str("abce"), null, "abcdef");
try testCase("def", Pf.str("abc"), null, "def");
}
test "oneof" {
var p = Pf.oneof(&.{
Pf.str("cat"),
Pf.str("dog"),
Pf.str("sheep"),
});
try testCase("catdogsheep", p, "cat", "dogsheep");
try testCase("dogsheep", p, "dog", "sheep");
try testCase("sheep", p, "sheep", "");
try testCase("beep", p, null, "beep");
}
test "seq" {
var seq = Pf.seq(&.{
Pf.str("cat"),
Pf.str("dog"),
Pf.str("sheep"),
});
try testCase("catdogsheep", seq, "catdogsheep", "");
try testCase("catdogshee", seq, null, "catdogshee");
try testCase("catdog", seq, null, "catdog");
try testCase("cat", seq, null, "cat");
}
test "seq oneof" {
var p = Pf.seq(&.{
Pf.oneof(&.{
Pf.str("cat"),
Pf.str("dog"),
}),
Pf.str("sheep"),
});
try testCase("catdogsheep", p, null, "catdogsheep");
try testCase("catsheep", p, "catsheep", "");
try testCase("dogsheep", p, "dogsheep", "");
try testCase("cat", p, null, "cat");
try testCase("dog", p, null, "dog");
try testCase("dogshee", p, null, "dogshee");
}
test "oneof seq" {
var p = Pf.oneof(&.{
Pf.seq(&.{
Pf.str("cat"),
Pf.str("dog"),
}),
Pf.str("sheep"),
});
try testCase("catdog", p, "catdog", "");
try testCase("sheep", p, "sheep", "");
try testCase("catsheep", p, null, "catsheep");
try testCase("dogsheep", p, null, "dogsheep");
try testCase("cat", p, null, "cat");
try testCase("dog", p, null, "dog");
try testCase("catdogshee", p, "catdog", "shee");
}
test "opt" {
{
var p = Pf.opt(Pf.str("cat"));
try testCase("catdog", p, "cat", "dog");
try testCase("dog", p, "", "dog");
}
{
var p = Pf.opt(Pf.str("cat"));
try testCase("catdog", p, "cat", "dog");
try testCase("dog", p, "", "dog");
}
{
var p = Pf.opt(Pf.seq(&.{ Pf.str("cat"), Pf.str("dog") }));
try testCase("catdog", p, "catdog", "");
try testCase("cat", p, "", "cat");
try testCase("dog", p, "", "dog");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment