Skip to content

Instantly share code, notes, and snippets.

@Rexicon226
Last active August 15, 2025 07:03
Show Gist options
  • Save Rexicon226/1e45ea2086283fb668948fb83f235e23 to your computer and use it in GitHub Desktop.
Save Rexicon226/1e45ea2086283fb668948fb83f235e23 to your computer and use it in GitHub Desktop.
countScalar benchmark
const std = @import("std");
const testing = std.testing;
const iterations_per_byte = 1000;
const warmup_iterations = 10;
pub fn main() !void {
const allocator = std.heap.smp_allocator;
// Pin the process to a single core (1)
const cpu0001: std.os.linux.cpu_set_t = [1]usize{0b0001} ++ ([_]usize{0} ** (16 - 1));
try std.os.linux.sched_setaffinity(0, &cpu0001);
const stdout = std.io.getStdOut().writer();
const loops = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, loops);
var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();
const max_bytes = try std.fmt.parseInt(usize, loops[1], 10);
const pow_max_bytes = try std.math.powi(usize, 2, max_bytes);
const buffer = try allocator.alloc(u8, pow_max_bytes);
for (1..max_bytes) |N| {
const index = try std.math.powi(usize, 2, N);
const slice = buffer[0..index];
random.bytes(slice);
try stdout.print("{},", .{index});
inline for (.{
.{ countScalarButGood, "sinon idea new" },
.{ countScalar, "old" },
.{ countScalarIndex, "index of scalar" },
}) |impl| {
const func, const name = impl;
_ = name;
clflush(u8, slice);
const element = random.int(u8);
var i: u32 = 0;
var cycles: u64 = 0;
while (i < iterations_per_byte + warmup_iterations) : (i += 1) {
const start = rdtsc();
std.mem.doNotOptimizeAway(func(slice, element));
const end = rdtsc();
if (i > warmup_iterations) cycles += (end - start);
}
const cycles_per_byte = cycles / iterations_per_byte;
try stdout.print("{d},", .{cycles_per_byte});
}
try stdout.writeAll("\n");
}
}
/// X86 cycle counter
inline fn rdtsc() usize {
var a: u32 = undefined;
var b: u32 = undefined;
asm volatile ("rdtscp"
: [a] "={edx}" (a),
[b] "={eax}" (b),
:
: "ecx"
);
return (@as(u64, a) << 32) | b;
}
inline fn clflush(comptime T: type, slice: []const T) void {
for (0..(slice.len) / @bitSizeOf(T) * 8) |chunk| {
const offset = slice.ptr + (chunk * @bitSizeOf(T) * 8);
asm volatile ("clflush %[ptr]"
:
: [ptr] "m" (offset),
: "memory"
);
}
}
pub fn countScalarButGood(haystack: []const u8, needle: u8) usize {
const N = std.simd.suggestVectorLength(u8) orelse @sizeOf(u8);
const V = @Vector(N, u8);
const Int = std.meta.Int(.unsigned, N);
var found: usize = 0;
if (haystack.len < N) {
for (haystack) |item| {
if (item == needle) found += 1;
}
return found;
}
const broad: V = @splat(needle);
for (0..haystack.len / N) |i| {
const h: V = @bitCast(haystack[i * N ..][0..N].*);
const integer: Int = @bitCast(h == broad);
found += @popCount(integer);
}
const remaining = (haystack.len % N);
if (remaining == 0) return found;
const overlapped: std.math.Log2Int(Int) = @intCast(N - remaining);
const mask: Int = (@as(Int, 1) << overlapped) - 1;
const last: V = @bitCast(haystack[haystack.len - N ..][0..N].*);
const integer: Int = @bitCast(last == broad);
found += @popCount(integer & ~mask);
return found;
}
pub fn countScalar(haystack: []const u8, needle: u8) usize {
var found: usize = 0;
for (haystack) |item| {
if (item == needle) {
found += 1;
}
}
return found;
}
pub fn countScalarIndex(haystack: []const u8, needle: u8) usize {
var i: usize = 0;
var found: usize = 0;
while (std.mem.indexOfScalarPos(u8, haystack, i, needle)) |idx| {
i = idx + 1;
found += 1;
}
return found;
}
import matplotlib.pyplot as plt
filename = "data.txt"
sizes = []
new_cycles = []
old_cycles = []
index_cycles = []
with open(filename) as f:
for line in f:
if not line.strip():
continue
parts = line.strip().split(",")
if len(parts) < 4:
continue
size, x, y, index = map(int, parts[:4])
sizes.append(size)
new_cycles.append(x)
old_cycles.append(y)
index_cycles.append(index)
# Plot
plt.figure(figsize=(10, 6))
plt.plot(sizes, new_cycles, label="New", marker="o")
plt.plot(sizes, old_cycles, label="Old", marker="s")
plt.plot(sizes, index_cycles, label="Index", marker="*")
plt.xlabel("Input size")
plt.ylabel("Cycles")
plt.grid(True, which="both", ls="--", alpha=0.5)
plt.legend()
plt.tight_layout()
plt.savefig("test.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment