Last active
August 15, 2025 07:03
-
-
Save Rexicon226/1e45ea2086283fb668948fb83f235e23 to your computer and use it in GitHub Desktop.
countScalar benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const testing = std.testing; | |
const iterations_per_byte = 1000; | |
const warmup_iterations = 10; | |
pub fn main() !void { | |
const allocator = std.heap.smp_allocator; | |
// Pin the process to a single core (1) | |
const cpu0001: std.os.linux.cpu_set_t = [1]usize{0b0001} ++ ([_]usize{0} ** (16 - 1)); | |
try std.os.linux.sched_setaffinity(0, &cpu0001); | |
const stdout = std.io.getStdOut().writer(); | |
const loops = try std.process.argsAlloc(allocator); | |
defer std.process.argsFree(allocator, loops); | |
var prng = std.Random.DefaultPrng.init(0); | |
const random = prng.random(); | |
const max_bytes = try std.fmt.parseInt(usize, loops[1], 10); | |
const pow_max_bytes = try std.math.powi(usize, 2, max_bytes); | |
const buffer = try allocator.alloc(u8, pow_max_bytes); | |
for (1..max_bytes) |N| { | |
const index = try std.math.powi(usize, 2, N); | |
const slice = buffer[0..index]; | |
random.bytes(slice); | |
try stdout.print("{},", .{index}); | |
inline for (.{ | |
.{ countScalarButGood, "sinon idea new" }, | |
.{ countScalar, "old" }, | |
.{ countScalarIndex, "index of scalar" }, | |
}) |impl| { | |
const func, const name = impl; | |
_ = name; | |
clflush(u8, slice); | |
const element = random.int(u8); | |
var i: u32 = 0; | |
var cycles: u64 = 0; | |
while (i < iterations_per_byte + warmup_iterations) : (i += 1) { | |
const start = rdtsc(); | |
std.mem.doNotOptimizeAway(func(slice, element)); | |
const end = rdtsc(); | |
if (i > warmup_iterations) cycles += (end - start); | |
} | |
const cycles_per_byte = cycles / iterations_per_byte; | |
try stdout.print("{d},", .{cycles_per_byte}); | |
} | |
try stdout.writeAll("\n"); | |
} | |
} | |
/// X86 cycle counter | |
inline fn rdtsc() usize { | |
var a: u32 = undefined; | |
var b: u32 = undefined; | |
asm volatile ("rdtscp" | |
: [a] "={edx}" (a), | |
[b] "={eax}" (b), | |
: | |
: "ecx" | |
); | |
return (@as(u64, a) << 32) | b; | |
} | |
inline fn clflush(comptime T: type, slice: []const T) void { | |
for (0..(slice.len) / @bitSizeOf(T) * 8) |chunk| { | |
const offset = slice.ptr + (chunk * @bitSizeOf(T) * 8); | |
asm volatile ("clflush %[ptr]" | |
: | |
: [ptr] "m" (offset), | |
: "memory" | |
); | |
} | |
} | |
pub fn countScalarButGood(haystack: []const u8, needle: u8) usize { | |
const N = std.simd.suggestVectorLength(u8) orelse @sizeOf(u8); | |
const V = @Vector(N, u8); | |
const Int = std.meta.Int(.unsigned, N); | |
var found: usize = 0; | |
if (haystack.len < N) { | |
for (haystack) |item| { | |
if (item == needle) found += 1; | |
} | |
return found; | |
} | |
const broad: V = @splat(needle); | |
for (0..haystack.len / N) |i| { | |
const h: V = @bitCast(haystack[i * N ..][0..N].*); | |
const integer: Int = @bitCast(h == broad); | |
found += @popCount(integer); | |
} | |
const remaining = (haystack.len % N); | |
if (remaining == 0) return found; | |
const overlapped: std.math.Log2Int(Int) = @intCast(N - remaining); | |
const mask: Int = (@as(Int, 1) << overlapped) - 1; | |
const last: V = @bitCast(haystack[haystack.len - N ..][0..N].*); | |
const integer: Int = @bitCast(last == broad); | |
found += @popCount(integer & ~mask); | |
return found; | |
} | |
pub fn countScalar(haystack: []const u8, needle: u8) usize { | |
var found: usize = 0; | |
for (haystack) |item| { | |
if (item == needle) { | |
found += 1; | |
} | |
} | |
return found; | |
} | |
pub fn countScalarIndex(haystack: []const u8, needle: u8) usize { | |
var i: usize = 0; | |
var found: usize = 0; | |
while (std.mem.indexOfScalarPos(u8, haystack, i, needle)) |idx| { | |
i = idx + 1; | |
found += 1; | |
} | |
return found; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
filename = "data.txt" | |
sizes = [] | |
new_cycles = [] | |
old_cycles = [] | |
index_cycles = [] | |
with open(filename) as f: | |
for line in f: | |
if not line.strip(): | |
continue | |
parts = line.strip().split(",") | |
if len(parts) < 4: | |
continue | |
size, x, y, index = map(int, parts[:4]) | |
sizes.append(size) | |
new_cycles.append(x) | |
old_cycles.append(y) | |
index_cycles.append(index) | |
# Plot | |
plt.figure(figsize=(10, 6)) | |
plt.plot(sizes, new_cycles, label="New", marker="o") | |
plt.plot(sizes, old_cycles, label="Old", marker="s") | |
plt.plot(sizes, index_cycles, label="Index", marker="*") | |
plt.xlabel("Input size") | |
plt.ylabel("Cycles") | |
plt.grid(True, which="both", ls="--", alpha=0.5) | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig("test.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment