Last active
April 30, 2018 19:26
-
-
Save mossheim/405ddbd01daddc91de3ed0b7d1db0195 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cmath> | |
#include <cstdlib> | |
#include <ctime> | |
#define FP double | |
using std::cout; | |
using std::endl; | |
constexpr size_t num_FPs = 8 * 8192; | |
FP scaleneg_copysign(FP a, FP b) | |
{ | |
FP c = std::copysign(1.0, a); | |
c = (c + 1) * 0.5; | |
return (a * b * (1.0-c)) + (a * c); | |
} | |
FP scaleneg_signbit(FP a, FP b) | |
{ | |
int c = std::signbit(a); | |
return (a * b * c) + (a * (1.0-c)); | |
} | |
FP scaleneg_signbit_float(FP a, FP b) | |
{ | |
FP c = std::signbit(a); | |
return (a * b * c) + (a * (1.0-c)); | |
} | |
FP scaleneg_naive(FP a, FP b) | |
{ | |
return a < 0 ? a * b : a; | |
} | |
// between -5 and 5 | |
FP drand() { return ((FP)rand() / RAND_MAX - 0.5) * 10; } | |
FP nums[num_FPs]; | |
using scaleneg_t = FP (*)(FP, FP); | |
void time_func(scaleneg_t scaleneg) | |
{ | |
clock_t begin = std::clock(); | |
FP accum = 0.; | |
for (size_t i = 0; i < num_FPs; ++i) { | |
accum += scaleneg(nums[i], nums[i]); | |
} | |
clock_t end = std::clock(); | |
cout << "\tTime: " << ((FP)(end - begin) / CLOCKS_PER_SEC) << " [accum=" << accum << "]\n"; | |
} | |
int main() | |
{ | |
// fixed-point floating point | |
cout << std::fixed; | |
for (size_t i = 0; i < num_FPs; ++i) { | |
nums[i] = drand(); | |
} | |
for (size_t i = 0; i < 3; ++i) { | |
cout << "Run " << (i+1) << endl; | |
cout << "scaleneg_signbit:" << endl; | |
time_func(&scaleneg_signbit); | |
cout << "scaleneg_naive:" << endl; | |
time_func(&scaleneg_naive); | |
cout << "scaleneg_copysign:" << endl; | |
time_func(&scaleneg_copysign); | |
cout << "scaleneg_signbit_float:" << endl; | |
time_func(&scaleneg_signbit_float); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results:
gcc 7
AppleClang 9.0
VS 2017
num_FPs
was increased to 8192 * 8192 because of the lower precision of the default clock.Best:
scaleneg_signbit
scaleneg_naive
scaleneg_copysign
(thoughscaleneg_signbit
is nearly the same)