Last active
June 22, 2024 14:43
-
-
Save matu3ba/eb52a8132cd4fc39b1836275341b8ce5 to your computer and use it in GitHub Desktop.
benchmarks for muloti
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Benchmark 1: ./mulo_fast | |
Time (mean ± σ): 1.008 s ± 0.009 s [User: 1.006 s, System: 0.001 s] | |
Range (min … max): 0.996 s … 1.027 s 10 runs | |
Warning: The first benchmarking run for this command was significantly slower than the | |
rest (1.027 s). This could be caused by (filesystem) caches that were not filled until af | |
ter the first run. You should consider using the '--warmup' option to fill those caches b | |
efore the actual benchmark. Alternatively, use the '--prepare' option to clear the caches | |
before each timing run. | |
Benchmark 2: ./mulo_hack | |
Time (mean ± σ): 1.256 s ± 0.004 s [User: 1.250 s, System: 0.001 s] | |
Range (min … max): 1.248 s … 1.260 s 10 runs | |
Benchmark 3: ./mulo_llvm | |
Time (mean ± σ): 17.403 s ± 0.155 s [User: 17.363 s, System: 0.005 s] | |
Range (min … max): 17.314 s … 17.836 s 10 runs | |
Summary | |
'./mulo_fast' ran | |
1.25 ± 0.01 times faster than './mulo_hack' | |
17.26 ± 0.22 times faster than './mulo_llvm' | |
With loop size of 1_000_000_000 | |
Benchmark 1: ./mulo_fast | |
Time (mean ± σ): 21.826 s ± 1.182 s [User: 21.791 s, System: 0.001 s] | |
Range (min … max): 20.844 s … 24.191 s 10 runs | |
Benchmark 2: ./mulo_hack | |
⠙ Current estimate: 26.246 s ██████████████░░░░░░░░░░░░░░░░░░░░░░ ETA 00:02:39 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const builtin = @import("builtin"); | |
const math = std.math; | |
inline fn muloXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { | |
@setRuntimeSafety(builtin.is_test); | |
const BSIZE = @bitSizeOf(ST); | |
comptime var UT = switch (ST) { | |
i32 => u32, | |
i64 => u64, | |
i128 => u128, | |
else => unreachable, | |
}; | |
const min = @bitCast(ST, @as(UT, 1 << (BSIZE - 1))); | |
const max = ~min; | |
overflow.* = 0; | |
const result = a *% b; | |
// edge cases | |
if (a == min) { | |
if (b != 0 and b != 1) overflow.* = 1; | |
return result; | |
} | |
if (b == min) { | |
if (a != 0 and a != 1) overflow.* = 1; | |
return result; | |
} | |
// take sign of x sx | |
const sa = a >> (BSIZE - 1); | |
const sb = b >> (BSIZE - 1); | |
// take absolute value of a and b via | |
// abs(x) = (x^sx)) - sx | |
const abs_a = (a ^ sa) -% sa; | |
const abs_b = (b ^ sb) -% sb; | |
// unitary magnitude, cannot have overflow | |
if (abs_a < 2 or abs_b < 2) return result; | |
// compare the signs of operands | |
if ((a ^ b) >> (BSIZE - 1) != 0) { | |
if (abs_a > @divTrunc(max, abs_b)) overflow.* = 1; | |
} else { | |
if (abs_a > @divTrunc(min, -abs_b)) overflow.* = 1; | |
} | |
return result; | |
} | |
fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { | |
return muloXi4_generic(i128, a, b, overflow); | |
} | |
inline fn mulvXi_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { | |
@setRuntimeSafety(builtin.is_test); | |
const min = math.minInt(ST); | |
var res: ST = a *% b; | |
// Hacker's Delight section Overflow subsection Multiplication | |
// case a=-2^{31}, b=-1 problem, because | |
// on some machines a*b = -2^{31} with overflow | |
// Then -2^{31}/-1 overflows and any result is possible. | |
// => check with a<0 and b=-2^{31} | |
if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b)) | |
overflow.* = 1; | |
return @truncate(ST, res); | |
} | |
// adjusted mulv | |
fn __mulvti3(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { | |
return mulvXi_genericSmall(i128, a, b, overflow); | |
} | |
inline fn mulvXi_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { | |
@setRuntimeSafety(builtin.is_test); | |
overflow.* = 0; | |
const EST = switch (ST) { | |
i32 => i64, | |
i64 => i128, | |
i128 => i256, | |
else => unreachable, | |
}; | |
const min = math.minInt(ST); | |
const max = math.maxInt(ST); | |
var res: EST = @as(EST, a) * @as(EST, b); | |
//invariant: -2^{bitwidth(EST)} <= res <= 2^{bitwidth(EST)-1} | |
if (res < min or max < res) | |
overflow.* = 1; | |
return @truncate(ST, res); | |
} | |
fn __mulvti3fast(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { | |
return mulvXi_genericFast(i128, a, b, overflow); | |
} | |
pub fn main() !void { | |
var x: i128 = 0; | |
var y: i128 = 0; | |
var ov: c_int = 0; | |
var res: i128 = 0; | |
var sum: i128 = 0; | |
var sum2: i128 = 0; | |
const stdout = std.io.getStdOut(); | |
stdout.writeAll("starting\n") catch unreachable; | |
while (x < 50_000_000) { | |
//res = __muloti4(x, y, &ov); | |
//res = __mulvti3(x, y, &ov); | |
res = __mulvti3fast(x, y, &ov); | |
x += 1; | |
y += 1; | |
sum += res; | |
if (sum > 1_000_000) { | |
sum2 += 1; | |
sum = 0; | |
} | |
//std.debug.assert(ov != 1); | |
} | |
if (ov == 1) stdout.writeAll("error: overflow happened\n") catch unreachable; | |
//std.debug.print("sum2: {d}\n", .{sum2}); | |
if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable; | |
std.process.exit(0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment