Skip to content

Instantly share code, notes, and snippets.

@Rexicon226
Last active July 20, 2024 06:00
Show Gist options
  • Save Rexicon226/b533e0f1ec317b873cff691f54e63364 to your computer and use it in GitHub Desktop.
Save Rexicon226/b533e0f1ec317b873cff691f54e63364 to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
import pandas as pd
def sizeof_fmt(x, pos):
if x<0:
return ""
for x_unit in ['bytes', 'kB', 'MB', 'GB', 'TB']:
if x < 1024:
return "%3.0f %s" % (x, x_unit)
x /= 1024
def main():
file = open("data.txt", "r")
lines = file.readlines()
data = []
for i in range(0, len(lines), 1):
line = lines[i]
line = line.split(",")
size = int(line[0])
new_time = int(line[1])
old_time = int(line[2])
data.append([size, new_time, old_time])
file.close()
df = pd.DataFrame(data, columns=["size", "new", "old"])
df_to_plot = df
plt.plot(df_to_plot["size"], df_to_plot["new"].rolling(100).mean(), label="new")
plt.plot(df_to_plot["size"], df_to_plot["old"].rolling(100).mean(), label="old")
plt.gca().xaxis.set_major_formatter(tkr.FuncFormatter(sizeof_fmt))
plt.xlabel("Size")
plt.ylabel("Cycles")
plt.title("-title-")
plt.legend()
plt.savefig("graph.png")
plt.show()
main()
//! zig build-exe bench.zig -OReleaseFast -lc
const std = @import("std");
const allocator = std.heap.c_allocator;
const iterations_per_byte = 1000;
const warmup_iterations = 10;
// #20357
pub fn sched_setaffinity(pid: std.os.linux.pid_t, set: *const std.os.linux.cpu_set_t) !void {
const size = @sizeOf(std.os.linux.cpu_set_t);
const rc = std.os.linux.syscall3(.sched_setaffinity, @as(usize, @bitCast(@as(isize, pid))), size, @intFromPtr(set));
switch (std.posix.errno(rc)) {
.SUCCESS => return,
else => |err| return std.posix.unexpectedErrno(err),
}
}
pub fn main() !void {
// Pin the process to a single core (1)
const cpu0001: std.os.linux.cpu_set_t = [1]usize{0b0001} ++ ([_]usize{0} ** (16 - 1));
try sched_setaffinity(0, &cpu0001);
const loops = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, loops);
const max_bytes = try std.fmt.parseInt(usize, loops[1], 10);
const stdout = std.io.getStdOut();
const T = u32;
for (1..max_bytes) |N| {
const buffer = try allocator.alloc(T, N);
for (0..N) |i| buffer[i] = @intCast(i);
clflush(T, buffer);
var new_i: u32 = 0;
var new_cycles: usize = 0;
while (new_i < iterations_per_byte + warmup_iterations) : (new_i += 1) {
const start = rdtsc();
std.mem.doNotOptimizeAway(new_reverse(T, buffer));
const end = rdtsc();
if (new_i > warmup_iterations) new_cycles += (end - start);
}
for (0..N) |i| buffer[i] = @intCast(i);
clflush(T, buffer);
var old_i: u32 = 0;
var old_cycles: usize = 0;
while (old_i < iterations_per_byte + warmup_iterations) : (old_i += 1) {
const start = rdtsc();
std.mem.doNotOptimizeAway(old_reverse(T, buffer));
const end = rdtsc();
if (old_i > warmup_iterations) old_cycles += (end - start);
}
const new_cycles_per_byte = new_cycles / iterations_per_byte;
const old_cycles_per_byte = old_cycles / iterations_per_byte;
try stdout.writer().print("{},{d},{d}\n", .{
N,
new_cycles_per_byte,
old_cycles_per_byte,
});
allocator.free(buffer);
}
}
inline fn reverseVector(comptime N: usize, comptime T: type, a: []T) [N]T {
var res: [N]T = undefined;
inline for (0..N) |i| {
res[i] = a[N - i - 1];
}
return res;
}
noinline fn new_reverse(comptime T: type, items: []T) void {
if (@sizeOf(T) == 0) return;
var i: usize = 0;
const end = items.len / 2;
if (std.simd.suggestVectorLength(T)) |simd_size| {
if (simd_size <= end) {
const simd_end = end - (simd_size - 1);
while (i < simd_end) : (i += simd_size) {
const left_slice = items[i .. i + simd_size];
const right_slice = items[items.len - i - simd_size .. items.len - i];
const left_shuffled: [simd_size]T = reverseVector(simd_size, T, left_slice);
const right_shuffled: [simd_size]T = reverseVector(simd_size, T, right_slice);
@memcpy(right_slice, &left_shuffled);
@memcpy(left_slice, &right_shuffled);
}
}
}
while (i < end) : (i += 1) {
std.mem.swap(T, &items[i], &items[items.len - i - 1]);
}
}
noinline fn old_reverse(comptime T: type, items: []T) void {
var i: usize = 0;
const end = items.len / 2;
while (i < end) : (i += 1) {
std.mem.swap(T, &items[i], &items[items.len - i - 1]);
}
}
inline fn rdtsc() usize {
var a: u32 = undefined;
var b: u32 = undefined;
asm volatile ("rdtscp"
: [a] "={edx}" (a),
[b] "={eax}" (b),
:
: "ecx"
);
return (@as(u64, a) << 32) | b;
}
inline fn clflush(comptime T: type, slice: []const T) void {
for (0..slice.len / @sizeOf(T)) |chunk| {
const offset = slice.ptr + (chunk * @sizeOf(T));
asm volatile ("clflush %[ptr]"
:
: [ptr] "m" (offset),
: "memory"
);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment