Last active
March 12, 2021 05:39
-
-
Save archaistvolts/ef723e57874d30b7bbc3f4fb3667ae1e to your computer and use it in GitHub Desktop.
Parser combinators in zig
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // adapted from https://gist.github.com/slimsag/db2dd2c49aa038e23b654120e70c9b00 | |
| const std = @import("std"); | |
| const Allocator = std.mem.Allocator; | |
| pub const Error = error{ | |
| EndOfStream, | |
| Utf8InvalidStartByte, | |
| } || std.fs.File.ReadError || std.fs.File.SeekError || std.mem.Allocator.Error; | |
| pub fn Parser(comptime Value: type, comptime Reader: type) type { | |
| return struct { | |
| const Self = @This(); | |
| pub const ReaderT = Reader; | |
| pub const ValueT = Value; | |
| _parse: fn (self: *Self, allocator: *Allocator, src: *Reader) Error!?Value, | |
| pub fn parse(self: *Self, allocator: *Allocator, src: *Reader) Error!?Value { | |
| return self._parse(self, allocator, src); | |
| } | |
| }; | |
| } | |
| pub fn String(comptime Reader: type) type { | |
| return struct { | |
| parser: P = .{ ._parse = parse }, | |
| want: []const u8, | |
| const Self = @This(); | |
| pub const P = Parser([]const u8, Reader); | |
| pub fn init(want: []const u8) Self { | |
| return Self{ .want = want }; | |
| } | |
| fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?[]const u8 { | |
| const self = @fieldParentPtr(Self, "parser", parser); | |
| const buf = try allocator.alloc(u8, self.want.len); | |
| const read = try src.reader().readAll(buf); | |
| std.log.debug("Literal read {} buf {s} want.len {}", .{ read, buf[0..read], self.want.len }); | |
| if (read < self.want.len or !std.mem.eql(u8, buf, self.want)) { | |
| try src.seekableStream().seekBy(-@intCast(i64, read)); | |
| allocator.free(buf); | |
| return null; | |
| } | |
| return buf; | |
| } | |
| }; | |
| } | |
| pub fn OneOf(comptime Value: type, comptime Reader: type) type { | |
| return struct { | |
| parser: P = .{ ._parse = parse }, | |
| parsers: []*P, | |
| const Self = @This(); | |
| const P = Parser(Value, Reader); | |
| pub fn init(parsers: []*P) Self { | |
| return Self{ .parsers = parsers }; | |
| } | |
| fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?Value { | |
| const self = @fieldParentPtr(Self, "parser", parser); | |
| for (self.parsers) |one_of_parser| { | |
| const result = try one_of_parser.parse(allocator, src); | |
| if (result != null) { | |
| return result; | |
| } | |
| } | |
| return null; | |
| } | |
| }; | |
| } | |
| pub fn Seq(comptime Value: type, comptime Reader: type) type { | |
| return struct { | |
| parser: P = .{ ._parse = parse }, | |
| parsers: []*P, | |
| const Self = @This(); | |
| const P = Parser(Value, Reader); | |
| pub fn init(parsers: []*P) Self { | |
| return Self{ .parsers = parsers }; | |
| } | |
| fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?Value { | |
| const self = @fieldParentPtr(Self, "parser", parser); | |
| var results = std.ArrayList(std.meta.Child(Value)).init(allocator); | |
| const pos = try src.seekableStream().getPos(); | |
| for (self.parsers) |seq_parser| { | |
| const result = try seq_parser.parse(allocator, src); | |
| std.log.debug("result {s}\n", .{result}); | |
| if (result == null) { | |
| try src.seekableStream().seekTo(pos); | |
| results.deinit(); | |
| return null; | |
| } | |
| try results.appendSlice(result.?); | |
| allocator.free(result.?); | |
| } | |
| return results.toOwnedSlice(); | |
| } | |
| }; | |
| } | |
| pub fn Opt(comptime P: type) type { | |
| return struct { | |
| parser: P = .{ ._parse = parse }, | |
| base_parser: *P, | |
| const Self = @This(); | |
| pub fn init(base_parser: *P) Self { | |
| return Self{ .base_parser = base_parser }; | |
| } | |
| fn parse(parser: *P, allocator: *Allocator, src: *P.ReaderT) Error!?P.ValueT { | |
| const self = @fieldParentPtr(Self, "parser", parser); | |
| const result = if (try self.base_parser.parse(allocator, src)) |res| res else std.mem.zeroes(P.ValueT); | |
| std.log.debug("opt result {s}", .{result}); | |
| return result; | |
| } | |
| }; | |
| } | |
| pub fn ParserFamily(comptime Value: type, comptime Reader: type) type { | |
| return struct { | |
| const P = Parser(Value, Reader); | |
| fn wrap(p: anytype) fn (anytype) callconv(.Inline) *P { | |
| return struct { | |
| fn func(args: anytype) callconv(.Inline) *P { | |
| return &p(args).parser; | |
| } | |
| }.func; | |
| } | |
| fn wrap2(p: anytype) fn ([]*P) callconv(.Inline) *P { | |
| return struct { | |
| fn func(args: []*P) callconv(.Inline) *P { | |
| return &p(args).parser; | |
| } | |
| }.func; | |
| } | |
| pub const str = wrap(String(Reader).init); | |
| pub const oneof = wrap2(OneOf(Value, Reader).init); | |
| pub const seq = wrap2(Seq(Value, Reader).init); | |
| pub const opt = wrap(Opt(P).init); | |
| }; | |
| } | |
| // ------------------------------ | |
| // Tests | |
| // ------------------------------ | |
| const t = std.testing; | |
| fn testCase(input: []const u8, parser: anytype, expected: ?[]const u8, remainder: ?[]const u8) !void { | |
| const allocator = t.allocator; | |
| var reader = std.io.fixedBufferStream(input); | |
| var result = try parser.parse(allocator, &reader); | |
| std.log.debug("result {s}", .{result}); | |
| if (expected == null) { | |
| if (result != null) std.log.err("expected null but got {s}", .{result}); | |
| t.expect(result == null); | |
| } else { | |
| t.expect(result != null); | |
| t.expectEqualStrings(expected.?, result.?); | |
| allocator.free(result.?); | |
| } | |
| if (remainder) |rem| { | |
| var buf: [0x100]u8 = undefined; | |
| const bytes_read = try reader.read(&buf); | |
| t.expectEqualStrings(rem, buf[0..bytes_read]); | |
| } | |
| } | |
| const Fbs = std.io.FixedBufferStream([]const u8); | |
| const Str = String(Fbs); | |
| const Pf = ParserFamily([]const u8, Fbs); | |
| test "literal" { | |
| try testCase("abcdef", Pf.str("abc"), "abc", "def"); | |
| try testCase("abcdef", Pf.str("abce"), null, "abcdef"); | |
| try testCase("def", Pf.str("abc"), null, "def"); | |
| } | |
| test "oneof" { | |
| var p = Pf.oneof(&.{ | |
| Pf.str("cat"), | |
| Pf.str("dog"), | |
| Pf.str("sheep"), | |
| }); | |
| try testCase("catdogsheep", p, "cat", "dogsheep"); | |
| try testCase("dogsheep", p, "dog", "sheep"); | |
| try testCase("sheep", p, "sheep", ""); | |
| try testCase("beep", p, null, "beep"); | |
| } | |
| test "seq" { | |
| var seq = Pf.seq(&.{ | |
| Pf.str("cat"), | |
| Pf.str("dog"), | |
| Pf.str("sheep"), | |
| }); | |
| try testCase("catdogsheep", seq, "catdogsheep", ""); | |
| try testCase("catdogshee", seq, null, "catdogshee"); | |
| try testCase("catdog", seq, null, "catdog"); | |
| try testCase("cat", seq, null, "cat"); | |
| } | |
| test "seq oneof" { | |
| var p = Pf.seq(&.{ | |
| Pf.oneof(&.{ | |
| Pf.str("cat"), | |
| Pf.str("dog"), | |
| }), | |
| Pf.str("sheep"), | |
| }); | |
| try testCase("catdogsheep", p, null, "catdogsheep"); | |
| try testCase("catsheep", p, "catsheep", ""); | |
| try testCase("dogsheep", p, "dogsheep", ""); | |
| try testCase("cat", p, null, "cat"); | |
| try testCase("dog", p, null, "dog"); | |
| try testCase("dogshee", p, null, "dogshee"); | |
| } | |
| test "oneof seq" { | |
| var p = Pf.oneof(&.{ | |
| Pf.seq(&.{ | |
| Pf.str("cat"), | |
| Pf.str("dog"), | |
| }), | |
| Pf.str("sheep"), | |
| }); | |
| try testCase("catdog", p, "catdog", ""); | |
| try testCase("sheep", p, "sheep", ""); | |
| try testCase("catsheep", p, null, "catsheep"); | |
| try testCase("dogsheep", p, null, "dogsheep"); | |
| try testCase("cat", p, null, "cat"); | |
| try testCase("dog", p, null, "dog"); | |
| try testCase("catdogshee", p, "catdog", "shee"); | |
| } | |
| test "opt" { | |
| { | |
| var p = Pf.opt(Pf.str("cat")); | |
| try testCase("catdog", p, "cat", "dog"); | |
| try testCase("dog", p, "", "dog"); | |
| } | |
| { | |
| var p = Pf.opt(Pf.str("cat")); | |
| try testCase("catdog", p, "cat", "dog"); | |
| try testCase("dog", p, "", "dog"); | |
| } | |
| { | |
| var p = Pf.opt(Pf.seq(&.{ Pf.str("cat"), Pf.str("dog") })); | |
| try testCase("catdog", p, "catdog", ""); | |
| try testCase("cat", p, "", "cat"); | |
| try testCase("dog", p, "", "dog"); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment