Last active
July 20, 2024 06:00
-
-
Save Rexicon226/b533e0f1ec317b873cff691f54e63364 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import matplotlib.ticker as tkr | |
import pandas as pd | |
def sizeof_fmt(x, pos): | |
if x<0: | |
return "" | |
for x_unit in ['bytes', 'kB', 'MB', 'GB', 'TB']: | |
if x < 1024: | |
return "%3.0f %s" % (x, x_unit) | |
x /= 1024 | |
def main(): | |
file = open("data.txt", "r") | |
lines = file.readlines() | |
data = [] | |
for i in range(0, len(lines), 1): | |
line = lines[i] | |
line = line.split(",") | |
size = int(line[0]) | |
new_time = int(line[1]) | |
old_time = int(line[2]) | |
data.append([size, new_time, old_time]) | |
file.close() | |
df = pd.DataFrame(data, columns=["size", "new", "old"]) | |
df_to_plot = df | |
plt.plot(df_to_plot["size"], df_to_plot["new"].rolling(100).mean(), label="new") | |
plt.plot(df_to_plot["size"], df_to_plot["old"].rolling(100).mean(), label="old") | |
plt.gca().xaxis.set_major_formatter(tkr.FuncFormatter(sizeof_fmt)) | |
plt.xlabel("Size") | |
plt.ylabel("Cycles") | |
plt.title("-title-") | |
plt.legend() | |
plt.savefig("graph.png") | |
plt.show() | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! zig build-exe bench.zig -OReleaseFast -lc | |
const std = @import("std"); | |
const allocator = std.heap.c_allocator; | |
const iterations_per_byte = 1000; | |
const warmup_iterations = 10; | |
// #20357 | |
pub fn sched_setaffinity(pid: std.os.linux.pid_t, set: *const std.os.linux.cpu_set_t) !void { | |
const size = @sizeOf(std.os.linux.cpu_set_t); | |
const rc = std.os.linux.syscall3(.sched_setaffinity, @as(usize, @bitCast(@as(isize, pid))), size, @intFromPtr(set)); | |
switch (std.posix.errno(rc)) { | |
.SUCCESS => return, | |
else => |err| return std.posix.unexpectedErrno(err), | |
} | |
} | |
pub fn main() !void { | |
// Pin the process to a single core (1) | |
const cpu0001: std.os.linux.cpu_set_t = [1]usize{0b0001} ++ ([_]usize{0} ** (16 - 1)); | |
try sched_setaffinity(0, &cpu0001); | |
const loops = try std.process.argsAlloc(allocator); | |
defer std.process.argsFree(allocator, loops); | |
const max_bytes = try std.fmt.parseInt(usize, loops[1], 10); | |
const stdout = std.io.getStdOut(); | |
const T = u32; | |
for (1..max_bytes) |N| { | |
const buffer = try allocator.alloc(T, N); | |
for (0..N) |i| buffer[i] = @intCast(i); | |
clflush(T, buffer); | |
var new_i: u32 = 0; | |
var new_cycles: usize = 0; | |
while (new_i < iterations_per_byte + warmup_iterations) : (new_i += 1) { | |
const start = rdtsc(); | |
std.mem.doNotOptimizeAway(new_reverse(T, buffer)); | |
const end = rdtsc(); | |
if (new_i > warmup_iterations) new_cycles += (end - start); | |
} | |
for (0..N) |i| buffer[i] = @intCast(i); | |
clflush(T, buffer); | |
var old_i: u32 = 0; | |
var old_cycles: usize = 0; | |
while (old_i < iterations_per_byte + warmup_iterations) : (old_i += 1) { | |
const start = rdtsc(); | |
std.mem.doNotOptimizeAway(old_reverse(T, buffer)); | |
const end = rdtsc(); | |
if (old_i > warmup_iterations) old_cycles += (end - start); | |
} | |
const new_cycles_per_byte = new_cycles / iterations_per_byte; | |
const old_cycles_per_byte = old_cycles / iterations_per_byte; | |
try stdout.writer().print("{},{d},{d}\n", .{ | |
N, | |
new_cycles_per_byte, | |
old_cycles_per_byte, | |
}); | |
allocator.free(buffer); | |
} | |
} | |
inline fn reverseVector(comptime N: usize, comptime T: type, a: []T) [N]T { | |
var res: [N]T = undefined; | |
inline for (0..N) |i| { | |
res[i] = a[N - i - 1]; | |
} | |
return res; | |
} | |
noinline fn new_reverse(comptime T: type, items: []T) void { | |
if (@sizeOf(T) == 0) return; | |
var i: usize = 0; | |
const end = items.len / 2; | |
if (std.simd.suggestVectorLength(T)) |simd_size| { | |
if (simd_size <= end) { | |
const simd_end = end - (simd_size - 1); | |
while (i < simd_end) : (i += simd_size) { | |
const left_slice = items[i .. i + simd_size]; | |
const right_slice = items[items.len - i - simd_size .. items.len - i]; | |
const left_shuffled: [simd_size]T = reverseVector(simd_size, T, left_slice); | |
const right_shuffled: [simd_size]T = reverseVector(simd_size, T, right_slice); | |
@memcpy(right_slice, &left_shuffled); | |
@memcpy(left_slice, &right_shuffled); | |
} | |
} | |
} | |
while (i < end) : (i += 1) { | |
std.mem.swap(T, &items[i], &items[items.len - i - 1]); | |
} | |
} | |
noinline fn old_reverse(comptime T: type, items: []T) void { | |
var i: usize = 0; | |
const end = items.len / 2; | |
while (i < end) : (i += 1) { | |
std.mem.swap(T, &items[i], &items[items.len - i - 1]); | |
} | |
} | |
inline fn rdtsc() usize { | |
var a: u32 = undefined; | |
var b: u32 = undefined; | |
asm volatile ("rdtscp" | |
: [a] "={edx}" (a), | |
[b] "={eax}" (b), | |
: | |
: "ecx" | |
); | |
return (@as(u64, a) << 32) | b; | |
} | |
inline fn clflush(comptime T: type, slice: []const T) void { | |
for (0..slice.len / @sizeOf(T)) |chunk| { | |
const offset = slice.ptr + (chunk * @sizeOf(T)); | |
asm volatile ("clflush %[ptr]" | |
: | |
: [ptr] "m" (offset), | |
: "memory" | |
); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment