-
-
Save mrbid/1f25bfc27d97b81d5d9ec5e45f81a6e1 to your computer and use it in GitHub Desktop.
/* | |
James William Fletcher (github.com/mrbid) | |
August 2021 - March 2024 | |
Benchmarking sine wave functions. | |
https://james-william-fletcher.medium.com/benchmarking-sine-functions-16b067bf63ce | |
references: | |
http://www.ee.ic.ac.uk/pcheung/teaching/ee3_Study_Project/Sinewave%20Generation(708).pdf | |
https://demonstrations.wolfram.com/SineWaveGenerationUsingAnUnstableIIRFilter/ | |
https://en.wikipedia.org/wiki/Goertzel_algorithm | |
compile: gcc sine_bench.c -Ofast -lm -o sine_bench | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <math.h> | |
#include <string.h> | |
#include <locale.h> | |
#include <time.h> | |
#include <sys/time.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <x86intrin.h> | |
#define interval 1000000 | |
#define AVGITER 3000000 | |
int srandfq = 74235; | |
void srandf(const int seed) | |
{ | |
srandfq = seed; | |
} | |
float randf() | |
{ | |
// https://www.musicdsp.org/en/latest/Other/273-fast-float-random-numbers.html | |
// [email protected] | |
srandfq *= 16807; | |
return (float)(srandfq & 0x7FFFFFFF) * 4.6566129e-010f; | |
} | |
// source; James William Fletcher; this should be accurate to three decimal places | |
#define PIf 3.141592741f | |
#define x2PIf 6.283185482f // PI*2 | |
#define rx2PIf 0.1591549367f // 1/(PI*2) | |
#define wlerp(a, b, i) ((b - a) * i + a) | |
float lerp_sin(const float theta) | |
{ | |
// data for the wavetable | |
static int init = 0; | |
static float sine_wtable[256] = {0}; | |
// called once on first execution | |
// generates the wave table | |
if(init == 0) | |
{ | |
for(int i = 0; i < 256; i++) | |
sine_wtable[i] = sinf(i * 0.02454369329f); // 0.02454369329f = x2PIf / 256.f; | |
init = 1; | |
} | |
// normalise theta to a 0-1 range | |
float norm = theta * rx2PIf; | |
// preserve the fractional part that will be lopped off by the wrapping | |
const float a = norm * 256.f; // scales normal range to wave table range | |
const float fract = a-floor(a); | |
// wrap it | |
const unsigned char i = (unsigned char)(a); // this cast does the wrapping for us, it's fast. | |
// check for end of array case for wrap-around lerp. | |
if(i >= 255) | |
return wlerp(sine_wtable[255], sine_wtable[0], fract); | |
// do regular table lerp | |
return wlerp(sine_wtable[i], sine_wtable[i+1], fract); | |
} | |
float aliased_sin(const float theta) | |
{ | |
// data for the wavetable | |
static int init = 0; | |
static float sine_wtable[65536] = {0}; | |
// called once on first execution | |
if(init == 0) | |
{ | |
for(int i = 0; i < 65536; i++) | |
sine_wtable[i] = sinf(i * 9.587380191e-05f); // 9.587380191e-05f = x2PIf / 65536.f; | |
init = 1; | |
} | |
// return result | |
const unsigned short i = (unsigned short)(10430.37793f * theta); // 10430.37793f = 65536.f / x2PIf | |
return sine_wtable[i]; | |
} | |
// source; KVRAF; https://www.kvraudio.com/forum/viewtopic.php?t=321027 | |
// https://www.kvraudio.com/forum/viewtopic.php?p=4555831&sid=b366604bf9b3cfd7892f40e7b3fee4e4#p4555831 | |
// omega is passed as 2.0*PI*frequency/sampleRate | |
// Initialise as IIRSine(2, omega, 0); then call on tick as IIRSine(0, 0, 0); | |
float IIRSine(const int set, const float omega, const float phase) | |
{ | |
static float m_a1, m_y1, m_y2; | |
if(set > 0) | |
{ | |
m_a1 = omega; | |
if(set > 1) | |
{ | |
m_a1 = 2.0f * cosf(m_a1); | |
if(set > 2) | |
{ | |
m_y1 = sinf(phase - omega); | |
m_y2 = sinf(phase - 2.0f * omega); | |
} | |
} | |
} | |
const float y3 = m_a1 * m_y1 - m_y2; | |
m_y2 = m_y1; | |
m_y1 = y3; | |
return y3; | |
} | |
// source; analog.com (SHARC) | |
// ORIGINAL CODE FROM SHARC | |
// short output; | |
// main(){ | |
// int i; const short A=0x7e66; /* A=(1.975/2 * 32768) */ | |
// short y[3]={0,0x1209,0}; /* (y0,y1,y2), y1=(0.1409*32768) */ | |
// for (i=0; i<40; i++) { y[0] = (((A*y[1])>>15) + ((A*y[1])>>15)) – y[2]; y[2] = y[1]; /* y2 <–– y1 */ y[1] = y[0]; /* y1 <–– y0 */ | |
// output = y[0]; | |
// }} | |
float IIRSine2() | |
{ | |
static float mv[3] = {0, 0.1409f, 0}; | |
static const float A = 1.975f/2.f; | |
mv[0] = A * mv[1] - mv[2]; | |
mv[2] = mv[1]; | |
mv[1] = mv[0]; | |
return mv[0]; | |
} | |
// source; https://stackoverflow.com/a/62425331 | |
float fast_sin1(float x) | |
{ | |
x /= 2.f * PIf; | |
x -= (int)x; | |
if(x <= 0.5f) | |
{ | |
float t = 2.f * x * (2.f * x - 1.f); | |
return (PIf * t) / ((PIf - 4.f) * t - 1.f); | |
} | |
else | |
{ | |
float t = 2.f * (1.f - x) * (1.f - 2.f * x); | |
return -(PIf * t) / ((PIf - 4.f) * t - 1.f); | |
} | |
} | |
// source; https://www.gamedev.net/forums/topic/621589-extremely-fast-sin-approximation/ | |
double fast_sin2(double x) | |
{ | |
int k; | |
double y; | |
double z; | |
z = x; | |
z *= 0.3183098861837907; | |
z += 6755399441055744.0; | |
k = *((int *) &z); | |
z = k; | |
z *= 3.1415926535897932; | |
x -= z; | |
y = x; | |
y *= x; | |
z = 0.0073524681968701; | |
z *= y; | |
z -= 0.1652891139701474; | |
z *= y; | |
z += 0.9996919862959676; | |
x *= z; | |
k &= 1; | |
k += k; | |
z = k; | |
z *= x; | |
x -= z; | |
return x; | |
} | |
// source; https://www.musicdsp.org/en/latest/Other/115-sin-cos-tan-approximation.html | |
float fast_sin3(float fAngle) | |
{ | |
float fASqr = fAngle*fAngle; | |
float fResult = 7.61e-03f; | |
fResult *= fASqr; | |
fResult -= 1.6605e-01f; | |
fResult *= fASqr; | |
fResult += 1.0f; | |
fResult *= fAngle; | |
return fResult; | |
} | |
float fast_sin4(float fAngle) | |
{ | |
float fASqr = fAngle*fAngle; | |
float fResult = -2.39e-08f; | |
fResult *= fASqr; | |
fResult += 2.7526e-06f; | |
fResult *= fASqr; | |
fResult -= 1.98409e-04f; | |
fResult *= fASqr; | |
fResult += 8.3333315e-03f; | |
fResult *= fASqr; | |
fResult -= 1.666666664e-01f; | |
fResult *= fASqr; | |
fResult += 1.0f; | |
fResult *= fAngle; | |
return fResult; | |
} | |
// source; https://stackoverflow.com/a/52693283 | |
float fast_sin5(float x) | |
{ | |
return (16.f * x * (PIf - x)) / (5.f * PIf * PIf - (4.f * x * (PIf - x))); | |
//return ( 4 * x * (180 - x)) / (40500 - x * (180 - x)); | |
} | |
// source; I don't know, I found these some ten years ago and failed to accredit the original authors. | |
float fast_sin6(float x) | |
{ | |
return 1.273239544f * x + -0.636619772f * x * fabsf(x); | |
} | |
float fast_sin7(float x) | |
{ | |
float res = 0.f, pow = x, fact = 1.f; | |
for(int i = 0; i < 5; ++i) | |
{ | |
res += pow / fact; | |
pow *= x * x; | |
fact *= (2.f*(i+1.f))*(2.f*(i+1.f)+1.f); | |
} | |
return res; | |
} | |
// main | |
uint64_t microtime() | |
{ | |
struct timeval tv; | |
struct timezone tz; | |
memset(&tz, 0, sizeof(struct timezone)); | |
gettimeofday(&tv, &tz); | |
return 1000000 * tv.tv_sec + tv.tv_usec; | |
} | |
int main() | |
{ | |
srandf(time(0)); | |
// the big accuracy bench | |
const float test_max_increment = x2PIf/2; | |
const float test_max_max = x2PIf*2; // ssh, our secret. | |
for(float test_max = test_max_increment; test_max <= test_max_max; test_max += test_max_increment) | |
{ | |
printf("\n-------------------------------------------------\nTesting accuracy of functions from 0 to %.1f ...\n-------------------------------------------------\n\n", test_max); | |
// test the lerp_sin() accuracy | |
printf("Testing lerp_sin() accuracy...\n"); | |
float delta = 0.f; | |
float increment = 0.1f; | |
float samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(lerp_sin(f) - sinf(f)); | |
// printf("%.1f: %.7f - %.7f\n", f, lerp_sin(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
//exit(0); | |
// test the aliased_sin() accuracy | |
printf("\nTesting aliased_sin() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(aliased_sin(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", aliased_sin(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
// test the fast_sin1() accuracy | |
printf("\nTesting fast_sin1() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(fast_sin1(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", fast_sin1(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
// test the fast_sin2() accuracy | |
printf("\nTesting fast_sin2() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(fast_sin2(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", fast_sin2(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
// test the fast_sin3() accuracy | |
printf("\nTesting fast_sin3() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(fast_sin3(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", fast_sin3(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
// test the fast_sin4() accuracy | |
printf("\nTesting fast_sin4() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(fast_sin4(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", fast_sin4(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
// test the fast_sin5() accuracy | |
printf("\nTesting fast_sin5() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(fast_sin5(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", fast_sin5(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
// test the fast_sin6() accuracy | |
printf("\nTesting fast_sin6() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0.f; d < 6; d++) | |
{ | |
for(float f = 0; f < test_max; f += increment) | |
{ | |
delta += fabsf(fast_sin6(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", fast_sin6(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
// test the fast_sin7() accuracy | |
printf("\nTesting fast_sin7() accuracy...\n"); | |
delta = 0.f; | |
increment = 0.1f; | |
samples = 0.f; | |
for(int d = 0; d < 6; d++) | |
{ | |
for(float f = 0.f; f < test_max; f += increment) | |
{ | |
delta += fabsf(fast_sin7(f) - sinf(f)); | |
// printf("%.7f - %.7f\n", fast_sin7(f), sinf(f)); | |
// usleep(333333); | |
samples += 1.f; | |
} | |
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
increment /= 10.f; | |
} | |
} | |
// | |
printf("\n-------------------------------------------------\nTesting speed of functions using random inputs.\n-------------------------------------------------\n\n"); | |
// prep benchmarks | |
setlocale(LC_NUMERIC, ""); | |
float ret = 0.f; | |
unsigned long e = 0, ui = 0; | |
uint64_t st = 0, et = 0, avg = 0; | |
//////// __rdtsc() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
avg += __rdtsc()-st; | |
} | |
printf("__rdtsc() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += __rdtsc(); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// sinf() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += sinf(st); | |
avg += __rdtsc()-st; | |
} | |
printf("sin() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += sinf(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// IIRSine() | |
IIRSine(2, 0, 0); | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += IIRSine(1, st, 0); | |
avg += __rdtsc()-st; | |
} | |
printf("IIRSine() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += IIRSine(1, 0, 0); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// IIRSine2() | |
IIRSine2(); | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += IIRSine2(); | |
avg += __rdtsc()-st; | |
} | |
printf("IIRSine2() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += IIRSine2(); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// lerp_sin() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += lerp_sin(randf()*x2PIf); | |
avg += __rdtsc()-st; | |
} | |
printf("lerp_sin() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += lerp_sin(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// aliased_sin() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += aliased_sin(randf()*x2PIf); | |
avg += __rdtsc()-st; | |
} | |
printf("aliased_sin() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += aliased_sin(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// fast_sin1() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += fast_sin1(st); | |
avg += __rdtsc()-st; | |
} | |
printf("fast_sin1() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += fast_sin1(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// fast_sin2() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += fast_sin2(st); | |
avg += __rdtsc()-st; | |
} | |
printf("fast_sin2() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += fast_sin2(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// fast_sin3() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += fast_sin3(st); | |
avg += __rdtsc()-st; | |
} | |
printf("fast_sin3() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += fast_sin3(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// fast_sin4() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += fast_sin4(st); | |
avg += __rdtsc()-st; | |
} | |
printf("fast_sin4() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += fast_sin4(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// fast_sin5() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += fast_sin5(st); | |
avg += __rdtsc()-st; | |
} | |
printf("fast_sin5() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += fast_sin5(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// fast_sin6() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += fast_sin6(st); | |
avg += __rdtsc()-st; | |
} | |
printf("fast_sin6() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += fast_sin6(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// fast_sin7() | |
avg = 0; | |
for(int i = 0; i < AVGITER; i++) | |
{ | |
st = __rdtsc(); | |
ret += fast_sin7(st); | |
avg += __rdtsc()-st; | |
} | |
printf("fast_sin7() Cycles: %'lu\n", avg / AVGITER); | |
e = 0; | |
st = microtime(); | |
while(microtime() - st <= interval) | |
{ | |
ret += fast_sin7(randf()*x2PIf); | |
e++; | |
} | |
ui = interval / 1000000; | |
printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
//////// | |
// done | |
printf("%c\n", (char)ret); // forces the compiler to not disregard the functions we are testing | |
return 0; | |
} |
Hi, I think that there is a bug in fast_sin6 (very nicely reported by clang):
return 1.273239544f * x + -0.636619772f * x * abs(x);
here abs is
int abs(int x)
; in C you wantfabs
.Most likely this code came from a C++ codebase where the
float abs(float x)
overload exists in , but when compiled as C... :)
Well spotted! thank you very much for pointing this out and I have now updated the gist.
I'm using this to push this project even a little bit further: https://github.com/define-private-public/PSRayTracing
Thanks!
I'm using this to push this project even a little bit further: https://github.com/define-private-public/PSRayTracing
Thanks!
That's really cool. I am just starting work on my own CPU RayTracer in C and this project you are working on looks very exciting, I am just playing with it now and I am very impressed.
Hi, I think that there is a bug in fast_sin6 (very nicely reported by clang):
here abs is
int abs(int x)
; in C you wantfabs
.Most likely this code came from a C++ codebase where the
float abs(float x)
overload exists in , but when compiled as C... :)