Last active
November 13, 2025 07:32
-
-
Save notcancername/45529101cdbbea2bb0a19e1b120c5931 to your computer and use it in GitHub Desktop.
Reads a list of newline-delimited 64-bit unsigned integer strings from stdin and outputs the bitwise probability as well as the Shannon entropy.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const std = @import("std"); | |
| pub const Result = struct { | |
| total: u64 = 0, | |
| counts: [64]u64 = @splat(0), | |
| /// atomic | |
| pub fn add(to: *Result, from: *const Result) void { | |
| _ = @atomicRmw(u64, &to.total, .Add, from.total, .monotonic); | |
| for (&to.counts, &from.counts) |*d, s| { | |
| _ = @atomicRmw(u64, d, .Add, s, .monotonic); | |
| } | |
| } | |
| /// update total separately | |
| pub fn updateCounts(result: *Result, with: u64) void { | |
| inline for (0..64) |i| { | |
| result.counts[i] += @as(u1, @truncate(with >> i)); | |
| } | |
| } | |
| }; | |
| const batch_size = 128 << 20; | |
| pub const Batch = struct { | |
| buffer: [batch_size]u64 = undefined, | |
| len: usize, | |
| out: *Result, | |
| io: std.Io, | |
| done_queue: *std.Io.Queue(*Batch), | |
| pub fn slice(batch: *const Batch) []const u64 { | |
| return batch.buffer[0..batch.len]; | |
| } | |
| pub fn process(batch: *Batch) void { | |
| const ints = batch.slice(); | |
| var r: Result = .{ .total = ints.len }; | |
| for (batch.slice()) |i| { | |
| r.updateCounts(i); | |
| } | |
| batch.out.add(&r); | |
| batch.done_queue.putOneUncancelable(batch.io, batch); | |
| } | |
| }; | |
| pub fn main() !void { | |
| var thr: std.Io.Threaded = .init(std.heap.smp_allocator); | |
| defer thr.deinit(); | |
| thr.cpu_count = @max(thr.cpu_count catch 1, 2); | |
| const io = thr.io(); | |
| var buffer: [1 << 20]u8 = undefined; | |
| var reader = std.fs.File.stdin().reader(io, &buffer); | |
| var stdout_writer = std.fs.File.stdout().writer(&.{}); | |
| const batches_buffer = try std.heap.smp_allocator.alloc(u8, 1 << 30); | |
| defer std.heap.smp_allocator.free(batches_buffer); | |
| var batches_fba = std.heap.FixedBufferAllocator.init(batches_buffer); | |
| var batches_pool: std.heap.MemoryPool(Batch) = .init(batches_fba.allocator()); | |
| var processing: std.Io.Group = .init; | |
| defer processing.cancel(io); | |
| var completed_batches_buffer: [1 << 10]*Batch = undefined; | |
| var completed: std.Io.Queue(*Batch) = .init(&completed_batches_buffer); | |
| var result: Result = .{}; | |
| while (true) { | |
| const new_batch: *Batch = batches_pool.create() catch b: { | |
| break :b try completed.getOne(io); | |
| }; | |
| new_batch.* = .{ | |
| .buffer = undefined, | |
| .len = 0, | |
| .io = io, | |
| .out = &result, | |
| .done_queue = &completed, | |
| }; | |
| var batched = std.ArrayListUnmanaged(u64).initBuffer(&new_batch.buffer); | |
| while (try reader.interface.takeDelimiter('\n')) |line| { | |
| const int = std.fmt.parseInt(u64, line, 10) catch continue; | |
| batched.appendBounded(int) catch break; | |
| } | |
| if (batched.items.len == 0) break; | |
| new_batch.len = batched.items.len; | |
| processing.async(io, Batch.process, .{new_batch}); | |
| } | |
| processing.wait(io); | |
| try stdout_writer.interface.print("{any}\n", .{result}); | |
| var probabilities: [64]f64 = @splat(0); | |
| for (result.counts, &probabilities, 0..) |count, *p, i| { | |
| p.* = @as(f64, @floatFromInt(count)) / @as(f64, @floatFromInt(result.total)); | |
| std.debug.print("P(int >> {d} & 1) = {d:.2}%\n", .{ i, p.* * 100 }); | |
| } | |
| var shannon: f64 = 0; | |
| for (probabilities) |probability| { | |
| if (probability == 0) continue; | |
| shannon += probability * @log2(probability); | |
| } | |
| shannon = -shannon; | |
| std.debug.print("Shannon entropy: {d} bit\n", .{shannon}); | |
| try stdout_writer.interface.print("entropy: {} bit\n", .{shannon}); | |
| try stdout_writer.interface.flush(); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment