andrewrk · January 5, 2023 03:24
diff --git a/bench.zig b/bench.zig
 const std = @import("std");

 const KiB = 1024;
 const MiB = 1024 * KiB;

 pub fn main() !void {
    var buffer: [(10000 / example.len) * example.len]u8 = undefined;
    const total_bytes = 500000;
    {
        var i: usize = 0;
        while (i < buffer.len) {
            buffer[i..][0..example.len].* = example.*;
            i += example.len;
        }
    }

    var timer = try std.time.Timer.start();

    {
        var rng = std.rand.DefaultPrng.init(1);
        var bytes_left: usize = total_bytes;
        const start = timer.read();
        var result: usize = 0;
        while (bytes_left > 0) {
            const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
            result += noop(&.{}, buffer[0..amt]);
            bytes_left -= amt;
        }
        const end = timer.read();
        std.mem.doNotOptimizeAway(&result);
        try printResults("noop", total_bytes, end - start);
    }

    {
        var rng = std.rand.DefaultPrng.init(1);
        var bytes_left: usize = total_bytes;
        var h: Headers = .{};

        const start = timer.read();
        var result: usize = 0;
        while (bytes_left > 0) {
            const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
            result += h.feed(buffer[0..amt]);
            bytes_left -= amt;
        }
        const end = timer.read();
        std.mem.doNotOptimizeAway(&result);
        try printResults("naive", total_bytes, end - start);
    }

    {
        var rng = std.rand.DefaultPrng.init(1);
        var bytes_left: usize = total_bytes;
        var r: Response = .{};

        const start = timer.read();
        var result: usize = 0;
        while (bytes_left > 0) {
            const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
            result += r.findHeadersEnd(buffer[0..amt]);
            bytes_left -= amt;
        }
        const end = timer.read();
        std.mem.doNotOptimizeAway(&result);
        try printResults("fancy", total_bytes, end - start);
    }
 }

 fn printResults(name: []const u8, bytes_int: usize, elapsed: u64) !void {
    const bytes: f64 = @intToFloat(f64, bytes_int);
    const elapsed_s = @intToFloat(f64, elapsed) / std.time.ns_per_s;
    const throughput = @floatToInt(u64, bytes / elapsed_s);
    try std.io.getStdOut().writer().print("{s:>17}: {:10} MiB/s\n", .{
        name,
        throughput / (1 * MiB),
    });
 }

 const Context = struct {};

 fn noop(c: *Context, bytes: []const u8) usize {
    _ = c;
    return bytes.len;
 }

 pub const Headers = struct {
    state: State = .start,

    pub const State = enum { invalid, start, line, nl_r, nl_n, nl2_r, finished };

    /// Returns how many bytes are processed into headers. Always less than or
    /// equal to bytes.len. If the amount returned is less than bytes.len, it
    /// means the headers ended and the first byte after the double \r\n\r\n is
    /// located at `bytes[result]`.
    pub fn feed(h: *Headers, bytes: []const u8) usize {
        for (bytes) |b, i| {
            switch (h.state) {
                .start => switch (b) {
                    '\r' => h.state = .nl_r,
                    else => {},
                },
                .nl_r => switch (b) {
                    '\n' => h.state = .nl_n,
                    '\r' => h.state = .nl_r,
                    else => {},
                },
                .nl_n => switch (b) {
                    '\r' => h.state = .nl2_r,
                    else => h.state = .line,
                },
                .nl2_r => switch (b) {
                    '\n' => h.state = .finished,
                    '\r' => h.state = .nl_r,
                    else => {},
                },
                .line => switch (b) {
                    '\r' => h.state = .nl_r,
                    else => {},
                },
                .invalid => return i,
                .finished => return i,
            }
        }
        return bytes.len;
    }
 };

 const Response = struct {
    state: State = .start,

    pub const State = enum {
        invalid,
        finished,
        start,
        seen_r,
        seen_rn,
        seen_rnr,
    };

    pub fn findHeadersEnd(r: *Response, bytes: []const u8) usize {
        var index: usize = 0;

        // TODO: https://github.com/ziglang/zig/issues/8220
        state: while (true) {
            switch (r.state) {
                .invalid => unreachable,
                .finished => unreachable,
                .start => while (true) {
                    switch (bytes.len - index) {
                        0 => return index,
                        1 => {
                            if (bytes[index] == '\r')
                                r.state = .seen_r;
                            return index + 1;
                        },
                        2 => {
                            if (int16(bytes[index..][0..2]) == int16("\r\n")) {
                                r.state = .seen_rn;
                            } else if (bytes[index + 1] == '\r') {
                                r.state = .seen_r;
                            }
                            return index + 2;
                        },
                        3 => {
                            if (int16(bytes[index..][0..2]) == int16("\r\n") and
                                bytes[index + 2] == '\r')
                            {
                                r.state = .seen_rnr;
                            } else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n")) {
                                r.state = .seen_rn;
                            } else if (bytes[index + 2] == '\r') {
                                r.state = .seen_r;
                            }
                            return index + 3;
                        },
                        4...15 => {
                            if (int32(bytes[index..][0..4]) == int32("\r\n\r\n")) {
                                r.state = .finished;
                                return index + 4;
                            } else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n") and
                                bytes[index + 3] == '\r')
                            {
                                r.state = .seen_rnr;
                                index += 4;
                                continue :state;
                            } else if (int16(bytes[index + 2 ..][0..2]) == int16("\r\n")) {
                                r.state = .seen_rn;
                                index += 4;
                                continue :state;
                            } else if (bytes[index + 3] == '\r') {
                                r.state = .seen_r;
                                index += 4;
                                continue :state;
                            }
                            index += 4;
                            continue;
                        },
                        else => {
                            const chunk = bytes[index..][0..16];
                            const v: @Vector(16, u8) = chunk.*;
                            const matches_r = v == @splat(16, @as(u8, '\r'));
                            const iota = std.simd.iota(u8, 16);
                            const default = @splat(16, @as(u8, 16));
                            const sub_index = @reduce(.Min, @select(u8, matches_r, iota, default));
                            switch (sub_index) {
                                0...12 => {
                                    index += sub_index + 4;
                                    if (int32(chunk[sub_index..][0..4]) == int32("\r\n\r\n")) {
                                        r.state = .finished;
                                        return index;
                                    }
                                    continue;
                                },
                                13 => {
                                    index += 16;
                                    if (int16(chunk[14..][0..2]) == int16("\n\r")) {
                                        r.state = .seen_rnr;
                                        continue :state;
                                    }
                                    continue;
                                },
                                14 => {
                                    index += 16;
                                    if (chunk[15] == '\n') {
                                        r.state = .seen_rn;
                                        continue :state;
                                    }
                                    continue;
                                },
                                15 => {
                                    r.state = .seen_r;
                                    index += 16;
                                    continue :state;
                                },
                                16 => {
                                    index += 16;
                                    continue;
                                },
                                else => unreachable,
                            }
                        },
                    }
                },

                .seen_r => switch (bytes.len - index) {
                    0 => return index,
                    1 => {
                        switch (bytes[index]) {
                            '\n' => r.state = .seen_rn,
                            '\r' => r.state = .seen_r,
                            else => r.state = .start,
                        }
                        return index + 1;
                    },
                    2 => {
                        if (int16(bytes[index..][0..2]) == int16("\n\r")) {
                            r.state = .seen_rnr;
                            return index + 2;
                        }
                        r.state = .start;
                        return index + 2;
                    },
                    else => {
                        if (int16(bytes[index..][0..2]) == int16("\n\r") and
                            bytes[index + 2] == '\n')
                        {
                            r.state = .finished;
                            return index + 3;
                        }
                        index += 3;
                        r.state = .start;
                        continue :state;
                    },
                },
                .seen_rn => switch (bytes.len - index) {
                    0 => return index,
                    1 => {
                        switch (bytes[index]) {
                            '\r' => r.state = .seen_rnr,
                            else => r.state = .start,
                        }
                        return index + 1;
                    },
                    else => {
                        if (int16(bytes[index..][0..2]) == int16("\r\n")) {
                            r.state = .finished;
                            return index + 2;
                        }
                        index += 2;
                        r.state = .start;
                        continue :state;
                    },
                },
                .seen_rnr => switch (bytes.len - index) {
                    0 => return index,
                    else => {
                        if (bytes[index] == '\n') {
                            r.state = .finished;
                            return index + 1;
                        }
                        index += 1;
                        r.state = .start;
                        continue :state;
                    },
                },
            }

            return index;
        }
    }

    inline fn int16(array: *const [2]u8) u16 {
        return @bitCast(u16, array.*);
    }

    inline fn int32(array: *const [4]u8) u32 {
        return @bitCast(u32, array.*);
    }

    inline fn int64(array: *const [8]u8) u64 {
        return @bitCast(u64, array.*);
    }
 };

 const example =
    "HTTP/1.1 302 Found\r\n" ++
    "Server: GitHub.com\r\n" ++
    "Date: Thu, 05 Jan 2023 03:17:48 GMT\r\n" ++
    "Content-Type: text/html; charset=utf-8\r\n" ++
    "Vary: X-PJAX, X-PJAX-Container, Turbo-Visit, Turbo-Frame, Accept-Encoding, Accept, X-Requested-With\r\n" ++
    "Location: https://codeload.github.com/ziglang/zig/tar.gz/refs/tags/0.10.0\r\n" ++
    "Cache-Control: max-age=0, private\r\n" ++
    "Strict-Transport-Security: max-age=31536000; includeSubdomains; preload\r\n" ++
    "X-Frame-Options: deny\r\n" ++
    "X-Content-Type-Options: nosniff\r\n" ++
    "X-XSS-Protection: 0\r\n" ++
    "Referrer-Policy: no-referrer-when-downgrade\r\n" ++
    "Content-Security-Policy: default-src 'none'; base-uri 'self'; block-all-mixed-content; child-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/; connect-src 'self' uploads.github.com objects-origin.githubusercontent.com www.githubstatus.com collector.github.com raw.githubusercontent.com api.github.com github-cloud.s3.amazonaws.com github-production-repository-file-5c1aeb.s3.amazonaws.com github-production-upload-manifest-file-7fdce7.s3.amazonaws.com github-production-user-asset-6210df.s3.amazonaws.com cdn.optimizely.com logx.optimizely.com/v1/events *.actions.githubusercontent.com wss://*.actions.githubusercontent.com online.visualstudio.com/api/v1/locations github-production-repository-image-32fea6.s3.amazonaws.com github-production-release-asset-2e65be.s3.amazonaws.com insights.github.com wss://alive.github.com; font-src github.githubassets.com; form-action 'self' github.com gist.github.com objects-origin.githubusercontent.com; frame-ancestors 'none'; frame-src viewscreen.githubusercontent.com notebooks.githubusercontent.com; img-src 'self' data: github.githubassets.com media.githubusercontent.com camo.githubusercontent.com identicons.github.com avatars.githubusercontent.com github-cloud.s3.amazonaws.com objects.githubusercontent.com objects-origin.githubusercontent.com secured-user-images.githubusercontent.com/ opengraph.githubassets.com github-production-user-asset-6210df.s3.amazonaws.com customer-stories-feed.github.com spotlights-feed.github.com *.githubusercontent.com; manifest-src 'self'; media-src github.com user-images.githubusercontent.com/ secured-user-images.githubusercontent.com/; script-src github.githubassets.com; style-src 'unsafe-inline' github.githubassets.com; worker-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/\r\n" ++
    "Content-Length: 0\r\n" ++
    "X-GitHub-Request-Id: A8D8:666F:271DFF9:2960BF4:63B6415C\r\n";
diff --git a/output.txt b/output.txt
 [nix-shell:~/dev/zig/build-release]$ lscpu | grep Model
 Model name:                      Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Model:                           158
 [nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast 
             noop:    1374170 MiB/s
            naive:       1094 MiB/s
            fancy:       5082 MiB/s

 [nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast -mcpu=baseline
             noop:    1288749 MiB/s
            naive:        859 MiB/s
            fancy:       5743 MiB/s
	const std = @import("std");

	const KiB = 1024;
	const MiB = 1024 * KiB;

	pub fn main() !void {
	var buffer: [(10000 / example.len) * example.len]u8 = undefined;
	const total_bytes = 500000;
	{
	var i: usize = 0;
	while (i < buffer.len) {
	buffer[i..][0..example.len].* = example.*;
	i += example.len;
	}
	}

	var timer = try std.time.Timer.start();

	{
	var rng = std.rand.DefaultPrng.init(1);
	var bytes_left: usize = total_bytes;
	const start = timer.read();
	var result: usize = 0;
	while (bytes_left > 0) {
	const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
	result += noop(&.{}, buffer[0..amt]);
	bytes_left -= amt;
	}
	const end = timer.read();
	std.mem.doNotOptimizeAway(&result);
	try printResults("noop", total_bytes, end - start);
	}

	{
	var rng = std.rand.DefaultPrng.init(1);
	var bytes_left: usize = total_bytes;
	var h: Headers = .{};

	const start = timer.read();
	var result: usize = 0;
	while (bytes_left > 0) {
	const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
	result += h.feed(buffer[0..amt]);
	bytes_left -= amt;
	}
	const end = timer.read();
	std.mem.doNotOptimizeAway(&result);
	try printResults("naive", total_bytes, end - start);
	}

	{
	var rng = std.rand.DefaultPrng.init(1);
	var bytes_left: usize = total_bytes;
	var r: Response = .{};

	const start = timer.read();
	var result: usize = 0;
	while (bytes_left > 0) {
	const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
	result += r.findHeadersEnd(buffer[0..amt]);
	bytes_left -= amt;
	}
	const end = timer.read();
	std.mem.doNotOptimizeAway(&result);
	try printResults("fancy", total_bytes, end - start);
	}
	}

	fn printResults(name: []const u8, bytes_int: usize, elapsed: u64) !void {
	const bytes: f64 = @intToFloat(f64, bytes_int);
	const elapsed_s = @intToFloat(f64, elapsed) / std.time.ns_per_s;
	const throughput = @floatToInt(u64, bytes / elapsed_s);
	try std.io.getStdOut().writer().print("{s:>17}: {:10} MiB/s\n", .{
	name,
	throughput / (1 * MiB),
	});
	}

	const Context = struct {};

	fn noop(c: *Context, bytes: []const u8) usize {
	_ = c;
	return bytes.len;
	}

	pub const Headers = struct {
	state: State = .start,

	pub const State = enum { invalid, start, line, nl_r, nl_n, nl2_r, finished };

	/// Returns how many bytes are processed into headers. Always less than or
	/// equal to bytes.len. If the amount returned is less than bytes.len, it
	/// means the headers ended and the first byte after the double \r\n\r\n is
	/// located at `bytes[result]`.
	pub fn feed(h: *Headers, bytes: []const u8) usize {
	for (bytes) \|b, i\| {
	switch (h.state) {
	.start => switch (b) {
	'\r' => h.state = .nl_r,
	else => {},
	},
	.nl_r => switch (b) {
	'\n' => h.state = .nl_n,
	'\r' => h.state = .nl_r,
	else => {},
	},
	.nl_n => switch (b) {
	'\r' => h.state = .nl2_r,
	else => h.state = .line,
	},
	.nl2_r => switch (b) {
	'\n' => h.state = .finished,
	'\r' => h.state = .nl_r,
	else => {},
	},
	.line => switch (b) {
	'\r' => h.state = .nl_r,
	else => {},
	},
	.invalid => return i,
	.finished => return i,
	}
	}
	return bytes.len;
	}
	};

	const Response = struct {
	state: State = .start,

	pub const State = enum {
	invalid,
	finished,
	start,
	seen_r,
	seen_rn,
	seen_rnr,
	};

	pub fn findHeadersEnd(r: *Response, bytes: []const u8) usize {
	var index: usize = 0;

	// TODO: https://github.com/ziglang/zig/issues/8220
	state: while (true) {
	switch (r.state) {
	.invalid => unreachable,
	.finished => unreachable,
	.start => while (true) {
	switch (bytes.len - index) {
	0 => return index,
	1 => {
	if (bytes[index] == '\r')
	r.state = .seen_r;
	return index + 1;
	},
	2 => {
	if (int16(bytes[index..][0..2]) == int16("\r\n")) {
	r.state = .seen_rn;
	} else if (bytes[index + 1] == '\r') {
	r.state = .seen_r;
	}
	return index + 2;
	},
	3 => {
	if (int16(bytes[index..][0..2]) == int16("\r\n") and
	bytes[index + 2] == '\r')
	{
	r.state = .seen_rnr;
	} else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n")) {
	r.state = .seen_rn;
	} else if (bytes[index + 2] == '\r') {
	r.state = .seen_r;
	}
	return index + 3;
	},
	4...15 => {
	if (int32(bytes[index..][0..4]) == int32("\r\n\r\n")) {
	r.state = .finished;
	return index + 4;
	} else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n") and
	bytes[index + 3] == '\r')
	{
	r.state = .seen_rnr;
	index += 4;
	continue :state;
	} else if (int16(bytes[index + 2 ..][0..2]) == int16("\r\n")) {
	r.state = .seen_rn;
	index += 4;
	continue :state;
	} else if (bytes[index + 3] == '\r') {
	r.state = .seen_r;
	index += 4;
	continue :state;
	}
	index += 4;
	continue;
	},
	else => {
	const chunk = bytes[index..][0..16];
	const v: @Vector(16, u8) = chunk.*;
	const matches_r = v == @splat(16, @as(u8, '\r'));
	const iota = std.simd.iota(u8, 16);
	const default = @splat(16, @as(u8, 16));
	const sub_index = @reduce(.Min, @select(u8, matches_r, iota, default));
	switch (sub_index) {
	0...12 => {
	index += sub_index + 4;
	if (int32(chunk[sub_index..][0..4]) == int32("\r\n\r\n")) {
	r.state = .finished;
	return index;
	}
	continue;
	},
	13 => {
	index += 16;
	if (int16(chunk[14..][0..2]) == int16("\n\r")) {
	r.state = .seen_rnr;
	continue :state;
	}
	continue;
	},
	14 => {
	index += 16;
	if (chunk[15] == '\n') {
	r.state = .seen_rn;
	continue :state;
	}
	continue;
	},
	15 => {
	r.state = .seen_r;
	index += 16;
	continue :state;
	},
	16 => {
	index += 16;
	continue;
	},
	else => unreachable,
	}
	},
	}
	},

	.seen_r => switch (bytes.len - index) {
	0 => return index,
	1 => {
	switch (bytes[index]) {
	'\n' => r.state = .seen_rn,
	'\r' => r.state = .seen_r,
	else => r.state = .start,
	}
	return index + 1;
	},
	2 => {
	if (int16(bytes[index..][0..2]) == int16("\n\r")) {
	r.state = .seen_rnr;
	return index + 2;
	}
	r.state = .start;
	return index + 2;
	},
	else => {
	if (int16(bytes[index..][0..2]) == int16("\n\r") and
	bytes[index + 2] == '\n')
	{
	r.state = .finished;
	return index + 3;
	}
	index += 3;
	r.state = .start;
	continue :state;
	},
	},
	.seen_rn => switch (bytes.len - index) {
	0 => return index,
	1 => {
	switch (bytes[index]) {
	'\r' => r.state = .seen_rnr,
	else => r.state = .start,
	}
	return index + 1;
	},
	else => {
	if (int16(bytes[index..][0..2]) == int16("\r\n")) {
	r.state = .finished;
	return index + 2;
	}
	index += 2;
	r.state = .start;
	continue :state;
	},
	},
	.seen_rnr => switch (bytes.len - index) {
	0 => return index,
	else => {
	if (bytes[index] == '\n') {
	r.state = .finished;
	return index + 1;
	}
	index += 1;
	r.state = .start;
	continue :state;
	},
	},
	}

	return index;
	}
	}

	inline fn int16(array: *const [2]u8) u16 {
	return @bitCast(u16, array.*);
	}

	inline fn int32(array: *const [4]u8) u32 {
	return @bitCast(u32, array.*);
	}

	inline fn int64(array: *const [8]u8) u64 {
	return @bitCast(u64, array.*);
	}
	};

	const example =
	"HTTP/1.1 302 Found\r\n" ++
	"Server: GitHub.com\r\n" ++
	"Date: Thu, 05 Jan 2023 03:17:48 GMT\r\n" ++
	"Content-Type: text/html; charset=utf-8\r\n" ++
	"Vary: X-PJAX, X-PJAX-Container, Turbo-Visit, Turbo-Frame, Accept-Encoding, Accept, X-Requested-With\r\n" ++
	"Location: https://codeload.github.com/ziglang/zig/tar.gz/refs/tags/0.10.0\r\n" ++
	"Cache-Control: max-age=0, private\r\n" ++
	"Strict-Transport-Security: max-age=31536000; includeSubdomains; preload\r\n" ++
	"X-Frame-Options: deny\r\n" ++
	"X-Content-Type-Options: nosniff\r\n" ++
	"X-XSS-Protection: 0\r\n" ++
	"Referrer-Policy: no-referrer-when-downgrade\r\n" ++
	"Content-Security-Policy: default-src 'none'; base-uri 'self'; block-all-mixed-content; child-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/; connect-src 'self' uploads.github.com objects-origin.githubusercontent.com www.githubstatus.com collector.github.com raw.githubusercontent.com api.github.com github-cloud.s3.amazonaws.com github-production-repository-file-5c1aeb.s3.amazonaws.com github-production-upload-manifest-file-7fdce7.s3.amazonaws.com github-production-user-asset-6210df.s3.amazonaws.com cdn.optimizely.com logx.optimizely.com/v1/events .actions.githubusercontent.com wss://.actions.githubusercontent.com online.visualstudio.com/api/v1/locations github-production-repository-image-32fea6.s3.amazonaws.com github-production-release-asset-2e65be.s3.amazonaws.com insights.github.com wss://alive.github.com; font-src github.githubassets.com; form-action 'self' github.com gist.github.com objects-origin.githubusercontent.com; frame-ancestors 'none'; frame-src viewscreen.githubusercontent.com notebooks.githubusercontent.com; img-src 'self' data: github.githubassets.com media.githubusercontent.com camo.githubusercontent.com identicons.github.com avatars.githubusercontent.com github-cloud.s3.amazonaws.com objects.githubusercontent.com objects-origin.githubusercontent.com secured-user-images.githubusercontent.com/ opengraph.githubassets.com github-production-user-asset-6210df.s3.amazonaws.com customer-stories-feed.github.com spotlights-feed.github.com *.githubusercontent.com; manifest-src 'self'; media-src github.com user-images.githubusercontent.com/ secured-user-images.githubusercontent.com/; script-src github.githubassets.com; style-src 'unsafe-inline' github.githubassets.com; worker-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/\r\n" ++
	"Content-Length: 0\r\n" ++
	"X-GitHub-Request-Id: A8D8:666F:271DFF9:2960BF4:63B6415C\r\n";
	[nix-shell:~/dev/zig/build-release]$ lscpu \| grep Model
	Model name: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
	Model: 158
	[nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast
	noop: 1374170 MiB/s
	naive: 1094 MiB/s
	fancy: 5082 MiB/s

	[nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast -mcpu=baseline
	noop: 1288749 MiB/s
	naive: 859 MiB/s
	fancy: 5743 MiB/s