Last active
May 10, 2024 20:50
-
-
Save wilzbach/2b64e10dec66a3153c51fbd1e6848f72 to your computer and use it in GitHub Desktop.
std.math vs. core.stdc.math vs. intrinsics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun: pow | |
std.math.pow = 13 secs, 743 ms, 902 μs, and 7 hnsecs | |
core.stdc.pow = 12 secs, 490 ms, 213 μs, and 2 hnsecs | |
fun: exp | |
std.math.exp = 6 secs, 905 ms, and 644 μs | |
core.stdc.exp = 16 secs, 336 ms, 330 μs, and 4 hnsecs | |
fun: exp2 | |
std.math.exp2 = 3 secs, 338 ms, 447 μs, and 9 hnsecs | |
core.stdc.exp2 = 5 secs, 244 ms, 528 μs, and 6 hnsecs | |
fun: sin | |
std.math.sin = 7 secs, 18 ms, 574 μs, and 5 hnsecs | |
core.stdc.sin = 18 secs, 82 ms, 553 μs, and 5 hnsecs | |
fun: cos | |
std.math.cos = 8 secs, 242 ms, 836 μs, and 2 hnsecs | |
core.stdc.cos = 18 secs, 69 ms, 296 μs, and 5 hnsecs | |
fun: log | |
std.math.log = 4 secs, 798 ms, 570 μs, and 7 hnsecs | |
core.stdc.log = 16 secs, 755 ms, 19 μs, and 1 hnsec | |
fun: log2 | |
std.math.log2 = 4 secs, 950 ms, 840 μs, and 9 hnsecs | |
core.stdc.log2 = 7 secs, 822 ms, 165 μs, and 7 hnsecs | |
fun: sqrt | |
std.math.sqrt = 1 sec, 29 ms, 414 μs, and 4 hnsecs | |
core.stdc.sqrt = 2 secs, 121 ms, 935 μs, and 7 hnsecs | |
fun: ceil | |
std.math.ceil = 3 secs, 762 ms, 841 μs, and 8 hnsecs | |
core.stdc.ceil = 1 sec, 321 ms, 931 μs, and 4 hnsecs | |
fun: round | |
std.math.round = 3 secs, 575 ms, and 408 μs | |
core.stdc.round = 1 sec, 504 ms, 444 μs, and 4 hnsecs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun: pow | |
std.math.pow = 15 secs, 914 ms, 102 μs, and 8 hnsecs | |
core.stdc.pow = 11 secs, 590 ms, 702 μs, and 5 hnsecs | |
llvm_pow = 13 secs, 570 ms, 439 μs, and 7 hnsecs | |
fun: exp | |
std.math.exp = 6 secs, 85 ms, 741 μs, and 7 hnsecs | |
core.stdc.exp = 16 secs, 267 ms, 997 μs, and 4 hnsecs | |
llvm_exp = 2 secs, 22 ms, and 876 μs | |
fun: exp2 | |
std.math.exp2 = 3 secs, 117 ms, 624 μs, and 2 hnsecs | |
core.stdc.exp2 = 2 secs, 973 ms, and 243 μs | |
llvm_exp2 = 2 secs, 451 ms, 628 μs, and 9 hnsecs | |
fun: sin | |
std.math.sin = 1 sec, 805 ms, 626 μs, and 7 hnsecs | |
core.stdc.sin = 17 secs, 743 ms, 33 μs, and 5 hnsecs | |
llvm_sin = 2 secs, 95 ms, and 178 μs | |
fun: cos | |
std.math.cos = 2 secs, 820 ms, 684 μs, and 5 hnsecs | |
core.stdc.cos = 17 secs, 626 ms, 78 μs, and 1 hnsec | |
llvm_cos = 2 secs, 814 ms, 60 μs, and 5 hnsecs | |
fun: log | |
std.math.log = 5 secs, 584 ms, 344 μs, and 5 hnsecs | |
core.stdc.log = 16 secs, 443 ms, 893 μs, and 3 hnsecs | |
llvm_log = 2 secs, 13 ms, 291 μs, and 1 hnsec | |
fun: log2 | |
std.math.log2 = 5 secs, 583 ms, 777 μs, and 7 hnsecs | |
core.stdc.log2 = 2 secs, 800 ms, 848 μs, and 5 hnsecs | |
llvm_log2 = 2 secs, 165 ms, 849 μs, and 6 hnsecs | |
fun: sqrt | |
std.math.sqrt = 799 ms and 917 μs | |
core.stdc.sqrt = 864 ms, 834 μs, and 7 hnsecs | |
llvm_sqrt = 439 ms, 469 μs, and 2 hnsecs | |
fun: ceil | |
std.math.ceil = 540 ms and 167 μs | |
core.stdc.ceil = 971 ms, 533 μs, and 6 hnsecs | |
llvm_ceil = 562 ms, 490 μs, and 2 hnsecs | |
fun: round | |
std.math.round = 3 secs, 52 ms, 567 μs, and 3 hnsecs | |
core.stdc.round = 958 ms and 217 μs | |
llvm_round = 590 ms, 742 μs, and 7 hnsecs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__gshared float r = 0.0; | |
void main() | |
{ | |
import std.datetime: benchmark, Duration; | |
import std.stdio : writefln; | |
import std.conv : to; | |
import std.meta : AliasSeq; | |
float a = 12.2; | |
float b = 5.5; | |
foreach (fun; AliasSeq!("pow", "exp", "exp2", "sin", "cos", "log", "log2", | |
"sqrt", "ceil", "round")) | |
{ | |
auto bench = benchmark!( | |
{ | |
import std.math; | |
static if (__traits(compiles, mixin(fun)(a))) | |
r += mixin(fun)(a); | |
else | |
r += mixin(fun)(a, b); | |
}, | |
{ | |
import core.stdc.math; | |
static if (__traits(compiles, mixin(fun)(a))) | |
r += mixin(fun)(a); | |
else | |
r += mixin(fun)(a, b); | |
}, | |
{ | |
version(LDC) | |
{ | |
import ldc.intrinsics; | |
static if (__traits(compiles, mixin("llvm_" ~ fun)(a))) | |
r += mixin("llvm_" ~ fun)(a); | |
else | |
r += mixin("llvm_" ~ fun)(a, b); | |
} | |
}, | |
)(20_000_0000); | |
string[] names = ["std.math." ~ fun, "core.stdc." ~ fun]; | |
version(LDC) | |
names ~= "llvm_" ~ fun; | |
writefln("fun: %s", fun); | |
foreach(j,r;bench) | |
if (j < names.length) | |
writefln("%-14s = %s", names[j], r.to!Duration); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey, what CPU and OS did you run this on?
I'm on Windows and I'm getting significantly different results. Many functions are much slower (notably sin/cos) using std.math compared to core.stdc when compiled with DMD (but not LDC). DMD simply emits
fsin
andfcos
instructions, LDC appears to call stdc functions.DMD

LDC
