Created
February 1, 2023 21:48
-
-
Save matu3ba/3c9cbefc8907d45543dffd40133791d4 to your computer and use it in GitHub Desktop.
Using c struct instead of return and writing overflow via pointer bears the same performance.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
128bit | |
(ins)[misterspoon@pc tryzig]$ hyperfine ./addo_crt ./addo_cstruct | |
Benchmark 1: ./addo_crt | |
Time (mean ± σ): 1.083 s ± 0.006 s [User: 1.082 s, System: 0.001 s] | |
Range (min … max): 1.072 s … 1.094 s 10 runs | |
Benchmark 2: ./addo_cstruct | |
Time (mean ± σ): 1.077 s ± 0.003 s [User: 1.075 s, System: 0.001 s] | |
Range (min … max): 1.073 s … 1.082 s 10 runs | |
Summary | |
'./addo_cstruct' ran | |
1.01 ± 0.01 times faster than './addo_crt' | |
64bit | |
(ins)[misterspoon@pc tryzig]$ hyperfine --warmup 5 ./addo64_crt ./addo64_cstruct | |
Benchmark 1: ./addo64_crt | |
Time (mean ± σ): 701.9 ms ± 4.9 ms [User: 700.3 ms, System: 0.6 ms] | |
Range (min … max): 696.9 ms … 710.8 ms 10 runs | |
Benchmark 2: ./addo64_cstruct | |
Time (mean ± σ): 707.8 ms ± 4.3 ms [User: 705.8 ms, System: 1.1 ms] | |
Range (min … max): 701.1 ms … 715.5 ms 10 runs | |
Summary | |
'./addo64_crt' ran | |
1.01 ± 0.01 times faster than './addo64_cstruct' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const builtin = @import("builtin"); | |
const math = std.math; | |
inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { | |
@setRuntimeSafety(builtin.is_test); | |
overflow.* = 0; | |
var sum: ST = a +% b; | |
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract | |
// Let sum = a +% b == a + b + carry == wraparound addition. | |
// Overflow in a+b+carry occurs, iff a and b have opposite signs | |
// and the sign of a+b+carry is the same as a (or equivalently b). | |
// Slower routine: res = ~(a ^ b) & ((sum ^ a) | |
// Faster routine: res = (sum ^ a) & (sum ^ b) | |
// Oerflow occured, iff (res < 0) | |
if (((sum ^ a) & (sum ^ b)) < 0) | |
overflow.* = 1; | |
return sum; | |
} | |
fn AddoXi5T(comptime ST: type) type { | |
return extern struct { | |
result: ST, | |
overflow: u8, | |
}; | |
} | |
inline fn addoXi5_generic(comptime ST: type, a: ST, b: ST) AddoXi5T(ST) { | |
@setRuntimeSafety(builtin.is_test); | |
var res: AddoXi5T(ST) = .{ | |
.result = a +% b, | |
.overflow = 0, | |
}; | |
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract | |
// Let sum = a +% b == a + b + carry == wraparound addition. | |
// Overflow in a+b+carry occurs, iff a and b have opposite signs | |
// and the sign of a+b+carry is the same as a (or equivalently b). | |
// Slower routine: res = ~(a ^ b) & ((sum ^ a) | |
// Faster routine: res = (sum ^ a) & (sum ^ b) | |
// Oerflow occured, iff (res < 0) | |
if (((res.result ^ a) & (res.result ^ b)) < 0) | |
res.overflow = 1; | |
return res; | |
} | |
pub fn __addoti5(a: i128, b: i128) callconv(.C) AddoXi5T(i128) { | |
return addoXi5_generic(i128, a, b); | |
} | |
pub fn __addodi5(a: i64, b: i64) callconv(.C) AddoXi5T(i64) { | |
return addoXi5_generic(i64, a, b); | |
} | |
pub fn __addoti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { | |
return addoXi4_generic(i128, a, b, overflow); | |
} | |
pub fn __addodi4(a: i64, b: i64, overflow: *c_int) callconv(.C) i64 { | |
return addoXi4_generic(i64, a, b, overflow); | |
} | |
fn simple_addosi4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { | |
overflow.* = 0; | |
const min: i128 = math.minInt(i128); | |
const max: i128 = math.maxInt(i128); | |
if (((a > 0) and (b > max - a)) or | |
((a < 0) and (b < min - a))) | |
overflow.* = 1; | |
return a +% b; | |
} | |
const Res = struct { | |
sum: i128, | |
overflow: u8, | |
}; | |
fn addoti4(a: i128, b: i128) Res { | |
@setRuntimeSafety(builtin.is_test); | |
var res = Res{ | |
.sum = undefined, | |
.overflow = 0, | |
}; | |
res.overflow = 0; | |
res.sum = a +% b; | |
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract | |
// Let sum = a +% b == a + b + carry == wraparound addition. | |
// Overflow in a+b+carry occurs, iff a and b have opposite signs | |
// and the sign of a+b+carry is the same as a (or equivalently b). | |
// Slower routine: res = ~(a ^ b) & ((sum ^ a) | |
// Faster routine: res = (sum ^ a) & (sum ^ b) | |
// Oerflow occured, iff (res < 0) | |
if (((res.sum ^ a) & (res.sum ^ b)) < 0) | |
res.overflow = 1; | |
return res; | |
} | |
// pub fn main() !void { | |
// var x: i64 = 0; | |
// var y: i64 = 0; | |
// var ov: c_int = 0; | |
// var res: i64 = 0; | |
// var sum: i64 = 0; | |
// var sum2: i64 = 0; | |
// const stdout = std.io.getStdOut(); | |
// | |
// stdout.writeAll("starting\n") catch unreachable; | |
// //while (x < 50_000_000) { | |
// while (x < 1_000_000_000) { | |
// //res = simple_addosi4(x, y, &ov); | |
// res = __addodi4(x, y, &ov); | |
// x += 1; | |
// y += 1; | |
// sum += res; | |
// if (sum > 1_000_000) { | |
// sum2 += 1; | |
// sum = 0; | |
// } | |
// //std.debug.assert(ov != 1); | |
// } | |
// if (ov == 1) stdout.writeAll("error: overflow happened\n") catch unreachable; | |
// //std.debug.print("sum2: {d}\n", .{sum2}); | |
// if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable; | |
// std.process.exit(0); | |
// } | |
pub fn main() !void { | |
var x: i64 = 0; | |
var y: i64 = 0; | |
// var res = Res{ | |
// .sum = 0, | |
// .overflow = 0, | |
// }; | |
var res: AddoXi5T(i64) = .{ | |
.result = 0, | |
.overflow = 0, | |
}; | |
var sum: i64 = 0; | |
var sum2: i64 = 0; | |
const stdout = std.io.getStdOut(); | |
stdout.writeAll("starting\n") catch unreachable; | |
//while (x < 50_000_000) { | |
while (x < 1_000_000_000) { | |
//res = simple_addosi4(x, y, &ov); | |
// res = addoti4(x, y); | |
res = __addodi5(x, y); | |
x += 1; | |
y += 1; | |
sum += res.result; | |
if (sum > 1_000_000) { | |
sum2 += 1; | |
sum = 0; | |
} | |
//std.debug.assert(ov != 1); | |
} | |
if (res.overflow == 1) stdout.writeAll("error: overflow happened\n") catch unreachable; | |
//std.debug.print("sum2: {d}\n", .{sum2}); | |
if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable; | |
std.process.exit(0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment