Last active
January 5, 2023 03:24
-
-
Save andrewrk/3998fdf38c4c1f771444381f6b8d263f to your computer and use it in GitHub Desktop.
benchmarking different versions of finding \r\n\r\n in a stream
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const std = @import("std"); | |
| const KiB = 1024; | |
| const MiB = 1024 * KiB; | |
| pub fn main() !void { | |
| var buffer: [(10000 / example.len) * example.len]u8 = undefined; | |
| const total_bytes = 500000; | |
| { | |
| var i: usize = 0; | |
| while (i < buffer.len) { | |
| buffer[i..][0..example.len].* = example.*; | |
| i += example.len; | |
| } | |
| } | |
| var timer = try std.time.Timer.start(); | |
| { | |
| var rng = std.rand.DefaultPrng.init(1); | |
| var bytes_left: usize = total_bytes; | |
| const start = timer.read(); | |
| var result: usize = 0; | |
| while (bytes_left > 0) { | |
| const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000)); | |
| result += noop(&.{}, buffer[0..amt]); | |
| bytes_left -= amt; | |
| } | |
| const end = timer.read(); | |
| std.mem.doNotOptimizeAway(&result); | |
| try printResults("noop", total_bytes, end - start); | |
| } | |
| { | |
| var rng = std.rand.DefaultPrng.init(1); | |
| var bytes_left: usize = total_bytes; | |
| var h: Headers = .{}; | |
| const start = timer.read(); | |
| var result: usize = 0; | |
| while (bytes_left > 0) { | |
| const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000)); | |
| result += h.feed(buffer[0..amt]); | |
| bytes_left -= amt; | |
| } | |
| const end = timer.read(); | |
| std.mem.doNotOptimizeAway(&result); | |
| try printResults("naive", total_bytes, end - start); | |
| } | |
| { | |
| var rng = std.rand.DefaultPrng.init(1); | |
| var bytes_left: usize = total_bytes; | |
| var r: Response = .{}; | |
| const start = timer.read(); | |
| var result: usize = 0; | |
| while (bytes_left > 0) { | |
| const amt = @min(bytes_left, rng.random().uintLessThanBiased(usize, 4000)); | |
| result += r.findHeadersEnd(buffer[0..amt]); | |
| bytes_left -= amt; | |
| } | |
| const end = timer.read(); | |
| std.mem.doNotOptimizeAway(&result); | |
| try printResults("fancy", total_bytes, end - start); | |
| } | |
| } | |
| fn printResults(name: []const u8, bytes_int: usize, elapsed: u64) !void { | |
| const bytes: f64 = @intToFloat(f64, bytes_int); | |
| const elapsed_s = @intToFloat(f64, elapsed) / std.time.ns_per_s; | |
| const throughput = @floatToInt(u64, bytes / elapsed_s); | |
| try std.io.getStdOut().writer().print("{s:>17}: {:10} MiB/s\n", .{ | |
| name, | |
| throughput / (1 * MiB), | |
| }); | |
| } | |
| const Context = struct {}; | |
| fn noop(c: *Context, bytes: []const u8) usize { | |
| _ = c; | |
| return bytes.len; | |
| } | |
| pub const Headers = struct { | |
| state: State = .start, | |
| pub const State = enum { invalid, start, line, nl_r, nl_n, nl2_r, finished }; | |
| /// Returns how many bytes are processed into headers. Always less than or | |
| /// equal to bytes.len. If the amount returned is less than bytes.len, it | |
| /// means the headers ended and the first byte after the double \r\n\r\n is | |
| /// located at `bytes[result]`. | |
| pub fn feed(h: *Headers, bytes: []const u8) usize { | |
| for (bytes) |b, i| { | |
| switch (h.state) { | |
| .start => switch (b) { | |
| '\r' => h.state = .nl_r, | |
| else => {}, | |
| }, | |
| .nl_r => switch (b) { | |
| '\n' => h.state = .nl_n, | |
| '\r' => h.state = .nl_r, | |
| else => {}, | |
| }, | |
| .nl_n => switch (b) { | |
| '\r' => h.state = .nl2_r, | |
| else => h.state = .line, | |
| }, | |
| .nl2_r => switch (b) { | |
| '\n' => h.state = .finished, | |
| '\r' => h.state = .nl_r, | |
| else => {}, | |
| }, | |
| .line => switch (b) { | |
| '\r' => h.state = .nl_r, | |
| else => {}, | |
| }, | |
| .invalid => return i, | |
| .finished => return i, | |
| } | |
| } | |
| return bytes.len; | |
| } | |
| }; | |
| const Response = struct { | |
| state: State = .start, | |
| pub const State = enum { | |
| invalid, | |
| finished, | |
| start, | |
| seen_r, | |
| seen_rn, | |
| seen_rnr, | |
| }; | |
| pub fn findHeadersEnd(r: *Response, bytes: []const u8) usize { | |
| var index: usize = 0; | |
| // TODO: https://github.com/ziglang/zig/issues/8220 | |
| state: while (true) { | |
| switch (r.state) { | |
| .invalid => unreachable, | |
| .finished => unreachable, | |
| .start => while (true) { | |
| switch (bytes.len - index) { | |
| 0 => return index, | |
| 1 => { | |
| if (bytes[index] == '\r') | |
| r.state = .seen_r; | |
| return index + 1; | |
| }, | |
| 2 => { | |
| if (int16(bytes[index..][0..2]) == int16("\r\n")) { | |
| r.state = .seen_rn; | |
| } else if (bytes[index + 1] == '\r') { | |
| r.state = .seen_r; | |
| } | |
| return index + 2; | |
| }, | |
| 3 => { | |
| if (int16(bytes[index..][0..2]) == int16("\r\n") and | |
| bytes[index + 2] == '\r') | |
| { | |
| r.state = .seen_rnr; | |
| } else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n")) { | |
| r.state = .seen_rn; | |
| } else if (bytes[index + 2] == '\r') { | |
| r.state = .seen_r; | |
| } | |
| return index + 3; | |
| }, | |
| 4...15 => { | |
| if (int32(bytes[index..][0..4]) == int32("\r\n\r\n")) { | |
| r.state = .finished; | |
| return index + 4; | |
| } else if (int16(bytes[index + 1 ..][0..2]) == int16("\r\n") and | |
| bytes[index + 3] == '\r') | |
| { | |
| r.state = .seen_rnr; | |
| index += 4; | |
| continue :state; | |
| } else if (int16(bytes[index + 2 ..][0..2]) == int16("\r\n")) { | |
| r.state = .seen_rn; | |
| index += 4; | |
| continue :state; | |
| } else if (bytes[index + 3] == '\r') { | |
| r.state = .seen_r; | |
| index += 4; | |
| continue :state; | |
| } | |
| index += 4; | |
| continue; | |
| }, | |
| else => { | |
| const chunk = bytes[index..][0..16]; | |
| const v: @Vector(16, u8) = chunk.*; | |
| const matches_r = v == @splat(16, @as(u8, '\r')); | |
| const iota = std.simd.iota(u8, 16); | |
| const default = @splat(16, @as(u8, 16)); | |
| const sub_index = @reduce(.Min, @select(u8, matches_r, iota, default)); | |
| switch (sub_index) { | |
| 0...12 => { | |
| index += sub_index + 4; | |
| if (int32(chunk[sub_index..][0..4]) == int32("\r\n\r\n")) { | |
| r.state = .finished; | |
| return index; | |
| } | |
| continue; | |
| }, | |
| 13 => { | |
| index += 16; | |
| if (int16(chunk[14..][0..2]) == int16("\n\r")) { | |
| r.state = .seen_rnr; | |
| continue :state; | |
| } | |
| continue; | |
| }, | |
| 14 => { | |
| index += 16; | |
| if (chunk[15] == '\n') { | |
| r.state = .seen_rn; | |
| continue :state; | |
| } | |
| continue; | |
| }, | |
| 15 => { | |
| r.state = .seen_r; | |
| index += 16; | |
| continue :state; | |
| }, | |
| 16 => { | |
| index += 16; | |
| continue; | |
| }, | |
| else => unreachable, | |
| } | |
| }, | |
| } | |
| }, | |
| .seen_r => switch (bytes.len - index) { | |
| 0 => return index, | |
| 1 => { | |
| switch (bytes[index]) { | |
| '\n' => r.state = .seen_rn, | |
| '\r' => r.state = .seen_r, | |
| else => r.state = .start, | |
| } | |
| return index + 1; | |
| }, | |
| 2 => { | |
| if (int16(bytes[index..][0..2]) == int16("\n\r")) { | |
| r.state = .seen_rnr; | |
| return index + 2; | |
| } | |
| r.state = .start; | |
| return index + 2; | |
| }, | |
| else => { | |
| if (int16(bytes[index..][0..2]) == int16("\n\r") and | |
| bytes[index + 2] == '\n') | |
| { | |
| r.state = .finished; | |
| return index + 3; | |
| } | |
| index += 3; | |
| r.state = .start; | |
| continue :state; | |
| }, | |
| }, | |
| .seen_rn => switch (bytes.len - index) { | |
| 0 => return index, | |
| 1 => { | |
| switch (bytes[index]) { | |
| '\r' => r.state = .seen_rnr, | |
| else => r.state = .start, | |
| } | |
| return index + 1; | |
| }, | |
| else => { | |
| if (int16(bytes[index..][0..2]) == int16("\r\n")) { | |
| r.state = .finished; | |
| return index + 2; | |
| } | |
| index += 2; | |
| r.state = .start; | |
| continue :state; | |
| }, | |
| }, | |
| .seen_rnr => switch (bytes.len - index) { | |
| 0 => return index, | |
| else => { | |
| if (bytes[index] == '\n') { | |
| r.state = .finished; | |
| return index + 1; | |
| } | |
| index += 1; | |
| r.state = .start; | |
| continue :state; | |
| }, | |
| }, | |
| } | |
| return index; | |
| } | |
| } | |
| inline fn int16(array: *const [2]u8) u16 { | |
| return @bitCast(u16, array.*); | |
| } | |
| inline fn int32(array: *const [4]u8) u32 { | |
| return @bitCast(u32, array.*); | |
| } | |
| inline fn int64(array: *const [8]u8) u64 { | |
| return @bitCast(u64, array.*); | |
| } | |
| }; | |
| const example = | |
| "HTTP/1.1 302 Found\r\n" ++ | |
| "Server: GitHub.com\r\n" ++ | |
| "Date: Thu, 05 Jan 2023 03:17:48 GMT\r\n" ++ | |
| "Content-Type: text/html; charset=utf-8\r\n" ++ | |
| "Vary: X-PJAX, X-PJAX-Container, Turbo-Visit, Turbo-Frame, Accept-Encoding, Accept, X-Requested-With\r\n" ++ | |
| "Location: https://codeload.github.com/ziglang/zig/tar.gz/refs/tags/0.10.0\r\n" ++ | |
| "Cache-Control: max-age=0, private\r\n" ++ | |
| "Strict-Transport-Security: max-age=31536000; includeSubdomains; preload\r\n" ++ | |
| "X-Frame-Options: deny\r\n" ++ | |
| "X-Content-Type-Options: nosniff\r\n" ++ | |
| "X-XSS-Protection: 0\r\n" ++ | |
| "Referrer-Policy: no-referrer-when-downgrade\r\n" ++ | |
| "Content-Security-Policy: default-src 'none'; base-uri 'self'; block-all-mixed-content; child-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/; connect-src 'self' uploads.github.com objects-origin.githubusercontent.com www.githubstatus.com collector.github.com raw.githubusercontent.com api.github.com github-cloud.s3.amazonaws.com github-production-repository-file-5c1aeb.s3.amazonaws.com github-production-upload-manifest-file-7fdce7.s3.amazonaws.com github-production-user-asset-6210df.s3.amazonaws.com cdn.optimizely.com logx.optimizely.com/v1/events *.actions.githubusercontent.com wss://*.actions.githubusercontent.com online.visualstudio.com/api/v1/locations github-production-repository-image-32fea6.s3.amazonaws.com github-production-release-asset-2e65be.s3.amazonaws.com insights.github.com wss://alive.github.com; font-src github.githubassets.com; form-action 'self' github.com gist.github.com objects-origin.githubusercontent.com; frame-ancestors 'none'; frame-src viewscreen.githubusercontent.com notebooks.githubusercontent.com; img-src 'self' data: github.githubassets.com media.githubusercontent.com camo.githubusercontent.com identicons.github.com avatars.githubusercontent.com github-cloud.s3.amazonaws.com objects.githubusercontent.com objects-origin.githubusercontent.com secured-user-images.githubusercontent.com/ opengraph.githubassets.com github-production-user-asset-6210df.s3.amazonaws.com customer-stories-feed.github.com spotlights-feed.github.com *.githubusercontent.com; manifest-src 'self'; media-src github.com user-images.githubusercontent.com/ secured-user-images.githubusercontent.com/; script-src github.githubassets.com; style-src 'unsafe-inline' github.githubassets.com; worker-src github.com/assets-cdn/worker/ gist.github.com/assets-cdn/worker/\r\n" ++ | |
| "Content-Length: 0\r\n" ++ | |
| "X-GitHub-Request-Id: A8D8:666F:271DFF9:2960BF4:63B6415C\r\n"; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [nix-shell:~/dev/zig/build-release]$ lscpu | grep Model | |
| Model name: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz | |
| Model: 158 | |
| [nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast | |
| noop: 1374170 MiB/s | |
| naive: 1094 MiB/s | |
| fancy: 5082 MiB/s | |
| [nix-shell:~/dev/zig/build-release]$ stage3/bin/zig run bench.zig -OReleaseFast -mcpu=baseline | |
| noop: 1288749 MiB/s | |
| naive: 859 MiB/s | |
| fancy: 5743 MiB/s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment