Skip to content

Instantly share code, notes, and snippets.

@andrewrk
Last active January 5, 2023 03:24
Show Gist options
  • Select an option

  • Save andrewrk/3998fdf38c4c1f771444381f6b8d263f to your computer and use it in GitHub Desktop.

Select an option

Save andrewrk/3998fdf38c4c1f771444381f6b8d263f to your computer and use it in GitHub Desktop.
benchmarking different versions of finding \r\n\r\n in a stream
const std = @import("std");
const KiB = 1024;
const MiB = 1024 * KiB;
pub fn main() !void {
var buffer: [(10000 / example.len) * example.len]u8 = undefined;
const total_bytes = 500000;
{
var i: usize = 0;
while (i < buffer.len) {
buffer[i..][0..example.len].* = example.*;
i += example.len;
}
}
var timer = try std.time.Timer.start();
{
var rng = std.rand.DefaultPrng.init(1);
var bytes_left: usize = total_bytes;
const start = timer.read();
var result: usize = 0;
while (bytes_left > 0) {
const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
result += noop(&.{}, buffer[0..amt]);
bytes_left -= amt;
}
const end = timer.read();
std.mem.doNotOptimizeAway(&result);
try printResults("noop", total_bytes, end - start);
}
{
var rng = std.rand.DefaultPrng.init(1);
var bytes_left: usize = total_bytes;
var h: Headers = .{};
const start = timer.read();
var result: usize = 0;
while (bytes_left > 0) {
const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
result += h.feed(buffer[0..amt]);
bytes_left -= amt;
}
const end = timer.read();
std.mem.doNotOptimizeAway(&result);
try printResults("naive", total_bytes, end - start);
}
{
var rng = std.rand.DefaultPrng.init(1);
var bytes_left: usize = total_bytes;
var r: Response = .{};
const start = timer.read();
var result: usize = 0;
while (bytes_left > 0) {
const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000));
result += r.findHeadersEnd(buffer[0..amt]);
bytes_left -= amt;
}
const end = timer.read();
std.mem.doNotOptimizeAway(&result);
try printResults("fancy", total_bytes, end - start);
}
}
fn printResults(name: []const u8, bytes_int: usize, elapsed: u64) !void {
const bytes: f64 = @intToFloat(f64, bytes_int);
const elapsed_s = @intToFloat(f64, elapsed) / std.time.ns_per_s;
const throughput = @floatToInt(u64, bytes / elapsed_s);
try std.io.getStdOut().writer().print("{s:>17}: {:10} MiB/s\n", .{
name,
throughput / (1 * MiB),
});
}
const Context = struct {};
fn noop(c: *Context, bytes: []const u8) usize {
_ = c;
return bytes.len;
}
pub const Headers = struct {
state: State = .start,
pub const State = enum { invalid, start, line, nl_r, nl_n, nl2_r, finished };
/// Returns how many bytes are processed into headers. Always less than or
/// equal to bytes.len. If the amount returned is less than bytes.len, it
/// means the headers ended and the first byte after the double \r\n\r\n is
/// located at `bytes[result]`.
pub fn feed(h: *Headers, bytes: []const u8) usize {
for (bytes) |b, i| {
switch (h.state) {
.start => switch (b) {
'\r' => h.state = .nl_r,
else => {},
},
.nl_r => switch (b) {
'\n' => h.state = .nl_n,
'\r' => h.state = .nl_r,
else => {},
},
.nl_n => switch (b) {
'\r' => h.state = .nl2_r,
else => h.state = .line,
},
.nl2_r => switch (b) {
'\n' => h.state = .finished,
'\r' => h.state = .nl_r,
else => {},
},
.line => switch (b) {
'\r' => h.state = .nl_r,
else => {},
},
.invalid => return i,
.finished => return i,
}
}
return bytes.len;
}
};
const Response = struct {
state: State = .start,
pub const State = enum {
invalid,
finished,
start,
seen_r,
seen_rn,
seen_rnr,
};
pub fn findHeadersEnd(r: *Response, bytes: []const u8) usize {
var index: usize = 0;
// TODO: https://github.com/ziglang/zig/issues/8220
state: while (true) {
switch (r.state) {
.invalid => unreachable,
.finished => unreachable,
.start => while (true) {
switch (bytes.len - index) {
0 => return index,
1 => {
if (bytes[index] == '\r')
r.state = .seen_r;
return index + 1;
},
2 => {
if (int16(bytes[index..][0..2]) == int16("\r\n")) {
r.state = .seen_rn;
} else if (bytes[index + 1] == '\r') {
r.state = .seen_r;
}
return index + 2;
},
3 => {
if (int16(bytes[index..][0..2]) == int16("\r\n") and
bytes[index + 2] == '\r')
{
r.state = .seen_rnr;
} else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n")) {
r.state = .seen_rn;
} else if (bytes[index + 2] == '\r') {
r.state = .seen_r;
}
return index + 3;
},
4...15 => {
if (int32(bytes[index..][0..4]) == int32("\r\n\r\n")) {
r.state = .finished;
return index + 4;
} else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n") and
bytes[index + 3] == '\r')
{
r.state = .seen_rnr;
index += 4;
continue :state;
} else if (int16(bytes[index + 2 ..][0..2]) == int16("\r\n")) {
r.state = .seen_rn;
index += 4;
continue :state;
} else if (bytes[index + 3] == '\r') {
r.state = .seen_r;
index += 4;
continue :state;
}
index += 4;
continue;
},
else => {
const chunk = bytes[index..][0..16];
const v: @Vector(16, u8) = chunk.*;
const matches_r = v == @splat(16, @as(u8, '\r'));
const iota = std.simd.iota(u8, 16);
const default = @splat(16, @as(u8, 16));
const sub_index = @reduce(.Min, @select(u8, matches_r, iota, default));
switch (sub_index) {
0...12 => {
index += sub_index + 4;
if (int32(chunk[sub_index..][0..4]) == int32("\r\n\r\n")) {
r.state = .finished;
return index;
}
continue;
},
13 => {
index += 16;
if (int16(chunk[14..][0..2]) == int16("\n\r")) {
r.state = .seen_rnr;
continue :state;
}
continue;
},
14 => {
index += 16;
if (chunk[15] == '\n') {
r.state = .seen_rn;
continue :state;
}
continue;
},
15 => {
r.state = .seen_r;
index += 16;
continue :state;
},
16 => {
index += 16;
continue;
},
else => unreachable,
}
},
}
},
.seen_r => switch (bytes.len - index) {
0 => return index,
1 => {
switch (bytes[index]) {
'\n' => r.state = .seen_rn,
'\r' => r.state = .seen_r,
else => r.state = .start,
}
return index + 1;
},
2 => {
if (int16(bytes[index..][0..2]) == int16("\n\r")) {
r.state = .seen_rnr;
return index + 2;
}
r.state = .start;
return index + 2;
},
else => {
if (int16(bytes[index..][0..2]) == int16("\n\r") and
bytes[index + 2] == '\n')
{
r.state = .finished;
return index + 3;
}
index += 3;
r.state = .start;
continue :state;
},
},
.seen_rn => switch (bytes.len - index) {
0 => return index,
1 => {
switch (bytes[index]) {
'\r' => r.state = .seen_rnr,
else => r.state = .start,
}
return index + 1;
},
else => {
if (int16(bytes[index..][0..2]) == int16("\r\n")) {
r.state = .finished;
return index + 2;
}
index += 2;
r.state = .start;
continue :state;
},
},
.seen_rnr => switch (bytes.len - index) {
0 => return index,
else => {
if (bytes[index] == '\n') {
r.state = .finished;
return index + 1;
}
index += 1;
r.state = .start;
continue :state;
},
},
}
return index;
}
}
inline fn int16(array: *const [2]u8) u16 {
return @bitCast(u16, array.*);
}
inline fn int32(array: *const [4]u8) u32 {
return @bitCast(u32, array.*);
}
inline fn int64(array: *const [8]u8) u64 {
return @bitCast(u64, array.*);
}
};
const example =
"HTTP/1.1 302 Found\r\n" ++
"Server: GitHub.com\r\n" ++
"Date: Thu, 05 Jan 2023 03:17:48 GMT\r\n" ++
"Content-Type: text/html; charset=utf-8\r\n" ++
"Vary: X-PJAX, X-PJAX-Container, Turbo-Visit, Turbo-Frame, Accept-Encoding, Accept, X-Requested-With\r\n" ++
"Location: https://codeload.github.com/ziglang/zig/tar.gz/refs/tags/0.10.0\r\n" ++
"Cache-Control: max-age=0, private\r\n" ++
"Strict-Transport-Security: max-age=31536000; includeSubdomains; preload\r\n" ++
"X-Frame-Options: deny\r\n" ++
"X-Content-Type-Options: nosniff\r\n" ++
"X-XSS-Protection: 0\r\n" ++
"Referrer-Policy: no-referrer-when-downgrade\r\n" ++
"Content-Security-Policy: default-src 'none'; base-uri 'self'; block-all-mixed-content; child-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/; connect-src 'self' uploads.github.com objects-origin.githubusercontent.com www.githubstatus.com collector.github.com raw.githubusercontent.com api.github.com github-cloud.s3.amazonaws.com github-production-repository-file-5c1aeb.s3.amazonaws.com github-production-upload-manifest-file-7fdce7.s3.amazonaws.com github-production-user-asset-6210df.s3.amazonaws.com cdn.optimizely.com logx.optimizely.com/v1/events *.actions.githubusercontent.com wss://*.actions.githubusercontent.com online.visualstudio.com/api/v1/locations github-production-repository-image-32fea6.s3.amazonaws.com github-production-release-asset-2e65be.s3.amazonaws.com insights.github.com wss://alive.github.com; font-src github.githubassets.com; form-action 'self' github.com gist.github.com objects-origin.githubusercontent.com; frame-ancestors 'none'; frame-src viewscreen.githubusercontent.com notebooks.githubusercontent.com; img-src 'self' data: github.githubassets.com media.githubusercontent.com camo.githubusercontent.com identicons.github.com avatars.githubusercontent.com github-cloud.s3.amazonaws.com objects.githubusercontent.com objects-origin.githubusercontent.com secured-user-images.githubusercontent.com/ opengraph.githubassets.com github-production-user-asset-6210df.s3.amazonaws.com customer-stories-feed.github.com spotlights-feed.github.com *.githubusercontent.com; manifest-src 'self'; media-src github.com user-images.githubusercontent.com/ secured-user-images.githubusercontent.com/; script-src github.githubassets.com; style-src 'unsafe-inline' github.githubassets.com; worker-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/\r\n" ++
"Content-Length: 0\r\n" ++
"X-GitHub-Request-Id: A8D8:666F:271DFF9:2960BF4:63B6415C\r\n";
[nix-shell:~/dev/zig/build-release]$ lscpu | grep Model
Model name: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
Model: 158
[nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast
noop: 1374170 MiB/s
naive: 1094 MiB/s
fancy: 5082 MiB/s
[nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast -mcpu=baseline
noop: 1288749 MiB/s
naive: 859 MiB/s
fancy: 5743 MiB/s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment