Skip to content

Instantly share code, notes, and snippets.

@notcancername
Last active November 13, 2025 07:32
Show Gist options
  • Select an option

  • Save notcancername/45529101cdbbea2bb0a19e1b120c5931 to your computer and use it in GitHub Desktop.

Select an option

Save notcancername/45529101cdbbea2bb0a19e1b120c5931 to your computer and use it in GitHub Desktop.
Reads a list of newline-delimited 64-bit unsigned integer strings from stdin and outputs the bitwise probability as well as the Shannon entropy.
const std = @import("std");
pub const Result = struct {
total: u64 = 0,
counts: [64]u64 = @splat(0),
/// atomic
pub fn add(to: *Result, from: *const Result) void {
_ = @atomicRmw(u64, &to.total, .Add, from.total, .monotonic);
for (&to.counts, &from.counts) |*d, s| {
_ = @atomicRmw(u64, d, .Add, s, .monotonic);
}
}
/// update total separately
pub fn updateCounts(result: *Result, with: u64) void {
inline for (0..64) |i| {
result.counts[i] += @as(u1, @truncate(with >> i));
}
}
};
const batch_size = 128 << 20;
pub const Batch = struct {
buffer: [batch_size]u64 = undefined,
len: usize,
out: *Result,
io: std.Io,
done_queue: *std.Io.Queue(*Batch),
pub fn slice(batch: *const Batch) []const u64 {
return batch.buffer[0..batch.len];
}
pub fn process(batch: *Batch) void {
const ints = batch.slice();
var r: Result = .{ .total = ints.len };
for (batch.slice()) |i| {
r.updateCounts(i);
}
batch.out.add(&r);
batch.done_queue.putOneUncancelable(batch.io, batch);
}
};
pub fn main() !void {
var thr: std.Io.Threaded = .init(std.heap.smp_allocator);
defer thr.deinit();
thr.cpu_count = @max(thr.cpu_count catch 1, 2);
const io = thr.io();
var buffer: [1 << 20]u8 = undefined;
var reader = std.fs.File.stdin().reader(io, &buffer);
var stdout_writer = std.fs.File.stdout().writer(&.{});
const batches_buffer = try std.heap.smp_allocator.alloc(u8, 1 << 30);
defer std.heap.smp_allocator.free(batches_buffer);
var batches_fba = std.heap.FixedBufferAllocator.init(batches_buffer);
var batches_pool: std.heap.MemoryPool(Batch) = .init(batches_fba.allocator());
var processing: std.Io.Group = .init;
defer processing.cancel(io);
var completed_batches_buffer: [1 << 10]*Batch = undefined;
var completed: std.Io.Queue(*Batch) = .init(&completed_batches_buffer);
var result: Result = .{};
while (true) {
const new_batch: *Batch = batches_pool.create() catch b: {
break :b try completed.getOne(io);
};
new_batch.* = .{
.buffer = undefined,
.len = 0,
.io = io,
.out = &result,
.done_queue = &completed,
};
var batched = std.ArrayListUnmanaged(u64).initBuffer(&new_batch.buffer);
while (try reader.interface.takeDelimiter('\n')) |line| {
const int = std.fmt.parseInt(u64, line, 10) catch continue;
batched.appendBounded(int) catch break;
}
if (batched.items.len == 0) break;
new_batch.len = batched.items.len;
processing.async(io, Batch.process, .{new_batch});
}
processing.wait(io);
try stdout_writer.interface.print("{any}\n", .{result});
var probabilities: [64]f64 = @splat(0);
for (result.counts, &probabilities, 0..) |count, *p, i| {
p.* = @as(f64, @floatFromInt(count)) / @as(f64, @floatFromInt(result.total));
std.debug.print("P(int >> {d} & 1) = {d:.2}%\n", .{ i, p.* * 100 });
}
var shannon: f64 = 0;
for (probabilities) |probability| {
if (probability == 0) continue;
shannon += probability * @log2(probability);
}
shannon = -shannon;
std.debug.print("Shannon entropy: {d} bit\n", .{shannon});
try stdout_writer.interface.print("entropy: {} bit\n", .{shannon});
try stdout_writer.interface.flush();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment