-
-
Save mrbid/1f25bfc27d97b81d5d9ec5e45f81a6e1 to your computer and use it in GitHub Desktop.
| /* | |
| James William Fletcher (github.com/mrbid) | |
| August 2021 - March 2024 | |
| Benchmarking sine wave functions. | |
| https://james-william-fletcher.medium.com/benchmarking-sine-functions-16b067bf63ce | |
| references: | |
| http://www.ee.ic.ac.uk/pcheung/teaching/ee3_Study_Project/Sinewave%20Generation(708).pdf | |
| https://demonstrations.wolfram.com/SineWaveGenerationUsingAnUnstableIIRFilter/ | |
| https://en.wikipedia.org/wiki/Goertzel_algorithm | |
| compile: gcc sine_bench.c -Ofast -lm -o sine_bench | |
| */ | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <stdint.h> | |
| #include <math.h> | |
| #include <string.h> | |
| #include <locale.h> | |
| #include <time.h> | |
| #include <sys/time.h> | |
| #include <unistd.h> | |
| #include <fcntl.h> | |
| #include <x86intrin.h> | |
| #define interval 1000000 | |
| #define AVGITER 3000000 | |
| int srandfq = 74235; | |
| void srandf(const int seed) | |
| { | |
| srandfq = seed; | |
| } | |
| float randf() | |
| { | |
| // https://www.musicdsp.org/en/latest/Other/273-fast-float-random-numbers.html | |
| // [email protected] | |
| srandfq *= 16807; | |
| return (float)(srandfq & 0x7FFFFFFF) * 4.6566129e-010f; | |
| } | |
| // source; James William Fletcher; this should be accurate to three decimal places | |
| #define PIf 3.141592741f | |
| #define x2PIf 6.283185482f // PI*2 | |
| #define rx2PIf 0.1591549367f // 1/(PI*2) | |
| #define wlerp(a, b, i) ((b - a) * i + a) | |
| float lerp_sin(const float theta) | |
| { | |
| // data for the wavetable | |
| static int init = 0; | |
| static float sine_wtable[256] = {0}; | |
| // called once on first execution | |
| // generates the wave table | |
| if(init == 0) | |
| { | |
| for(int i = 0; i < 256; i++) | |
| sine_wtable[i] = sinf(i * 0.02454369329f); // 0.02454369329f = x2PIf / 256.f; | |
| init = 1; | |
| } | |
| // normalise theta to a 0-1 range | |
| float norm = theta * rx2PIf; | |
| // preserve the fractional part that will be lopped off by the wrapping | |
| const float a = norm * 256.f; // scales normal range to wave table range | |
| const float fract = a-floor(a); | |
| // wrap it | |
| const unsigned char i = (unsigned char)(a); // this cast does the wrapping for us, it's fast. | |
| // check for end of array case for wrap-around lerp. | |
| if(i >= 255) | |
| return wlerp(sine_wtable[255], sine_wtable[0], fract); | |
| // do regular table lerp | |
| return wlerp(sine_wtable[i], sine_wtable[i+1], fract); | |
| } | |
| float aliased_sin(const float theta) | |
| { | |
| // data for the wavetable | |
| static int init = 0; | |
| static float sine_wtable[65536] = {0}; | |
| // called once on first execution | |
| if(init == 0) | |
| { | |
| for(int i = 0; i < 65536; i++) | |
| sine_wtable[i] = sinf(i * 9.587380191e-05f); // 9.587380191e-05f = x2PIf / 65536.f; | |
| init = 1; | |
| } | |
| // return result | |
| const unsigned short i = (unsigned short)(10430.37793f * theta); // 10430.37793f = 65536.f / x2PIf | |
| return sine_wtable[i]; | |
| } | |
| // source; KVRAF; https://www.kvraudio.com/forum/viewtopic.php?t=321027 | |
| // https://www.kvraudio.com/forum/viewtopic.php?p=4555831&sid=b366604bf9b3cfd7892f40e7b3fee4e4#p4555831 | |
| // omega is passed as 2.0*PI*frequency/sampleRate | |
| // Initialise as IIRSine(2, omega, 0); then call on tick as IIRSine(0, 0, 0); | |
| float IIRSine(const int set, const float omega, const float phase) | |
| { | |
| static float m_a1, m_y1, m_y2; | |
| if(set > 0) | |
| { | |
| m_a1 = omega; | |
| if(set > 1) | |
| { | |
| m_a1 = 2.0f * cosf(m_a1); | |
| if(set > 2) | |
| { | |
| m_y1 = sinf(phase - omega); | |
| m_y2 = sinf(phase - 2.0f * omega); | |
| } | |
| } | |
| } | |
| const float y3 = m_a1 * m_y1 - m_y2; | |
| m_y2 = m_y1; | |
| m_y1 = y3; | |
| return y3; | |
| } | |
| // source; analog.com (SHARC) | |
| // ORIGINAL CODE FROM SHARC | |
| // short output; | |
| // main(){ | |
| // int i; const short A=0x7e66; /* A=(1.975/2 * 32768) */ | |
| // short y[3]={0,0x1209,0}; /* (y0,y1,y2), y1=(0.1409*32768) */ | |
| // for (i=0; i<40; i++) { y[0] = (((A*y[1])>>15) + ((A*y[1])>>15)) – y[2]; y[2] = y[1]; /* y2 <–– y1 */ y[1] = y[0]; /* y1 <–– y0 */ | |
| // output = y[0]; | |
| // }} | |
| float IIRSine2() | |
| { | |
| static float mv[3] = {0, 0.1409f, 0}; | |
| static const float A = 1.975f/2.f; | |
| mv[0] = A * mv[1] - mv[2]; | |
| mv[2] = mv[1]; | |
| mv[1] = mv[0]; | |
| return mv[0]; | |
| } | |
| // source; https://stackoverflow.com/a/62425331 | |
| float fast_sin1(float x) | |
| { | |
| x /= 2.f * PIf; | |
| x -= (int)x; | |
| if(x <= 0.5f) | |
| { | |
| float t = 2.f * x * (2.f * x - 1.f); | |
| return (PIf * t) / ((PIf - 4.f) * t - 1.f); | |
| } | |
| else | |
| { | |
| float t = 2.f * (1.f - x) * (1.f - 2.f * x); | |
| return -(PIf * t) / ((PIf - 4.f) * t - 1.f); | |
| } | |
| } | |
| // source; https://www.gamedev.net/forums/topic/621589-extremely-fast-sin-approximation/ | |
| double fast_sin2(double x) | |
| { | |
| int k; | |
| double y; | |
| double z; | |
| z = x; | |
| z *= 0.3183098861837907; | |
| z += 6755399441055744.0; | |
| k = *((int *) &z); | |
| z = k; | |
| z *= 3.1415926535897932; | |
| x -= z; | |
| y = x; | |
| y *= x; | |
| z = 0.0073524681968701; | |
| z *= y; | |
| z -= 0.1652891139701474; | |
| z *= y; | |
| z += 0.9996919862959676; | |
| x *= z; | |
| k &= 1; | |
| k += k; | |
| z = k; | |
| z *= x; | |
| x -= z; | |
| return x; | |
| } | |
| // source; https://www.musicdsp.org/en/latest/Other/115-sin-cos-tan-approximation.html | |
| float fast_sin3(float fAngle) | |
| { | |
| float fASqr = fAngle*fAngle; | |
| float fResult = 7.61e-03f; | |
| fResult *= fASqr; | |
| fResult -= 1.6605e-01f; | |
| fResult *= fASqr; | |
| fResult += 1.0f; | |
| fResult *= fAngle; | |
| return fResult; | |
| } | |
| float fast_sin4(float fAngle) | |
| { | |
| float fASqr = fAngle*fAngle; | |
| float fResult = -2.39e-08f; | |
| fResult *= fASqr; | |
| fResult += 2.7526e-06f; | |
| fResult *= fASqr; | |
| fResult -= 1.98409e-04f; | |
| fResult *= fASqr; | |
| fResult += 8.3333315e-03f; | |
| fResult *= fASqr; | |
| fResult -= 1.666666664e-01f; | |
| fResult *= fASqr; | |
| fResult += 1.0f; | |
| fResult *= fAngle; | |
| return fResult; | |
| } | |
| // source; https://stackoverflow.com/a/52693283 | |
| float fast_sin5(float x) | |
| { | |
| return (16.f * x * (PIf - x)) / (5.f * PIf * PIf - (4.f * x * (PIf - x))); | |
| //return ( 4 * x * (180 - x)) / (40500 - x * (180 - x)); | |
| } | |
| // source; I don't know, I found these some ten years ago and failed to accredit the original authors. | |
| float fast_sin6(float x) | |
| { | |
| return 1.273239544f * x + -0.636619772f * x * fabsf(x); | |
| } | |
| float fast_sin7(float x) | |
| { | |
| float res = 0.f, pow = x, fact = 1.f; | |
| for(int i = 0; i < 5; ++i) | |
| { | |
| res += pow / fact; | |
| pow *= x * x; | |
| fact *= (2.f*(i+1.f))*(2.f*(i+1.f)+1.f); | |
| } | |
| return res; | |
| } | |
| // main | |
| uint64_t microtime() | |
| { | |
| struct timeval tv; | |
| struct timezone tz; | |
| memset(&tz, 0, sizeof(struct timezone)); | |
| gettimeofday(&tv, &tz); | |
| return 1000000 * tv.tv_sec + tv.tv_usec; | |
| } | |
| int main() | |
| { | |
| srandf(time(0)); | |
| // the big accuracy bench | |
| const float test_max_increment = x2PIf/2; | |
| const float test_max_max = x2PIf*2; // ssh, our secret. | |
| for(float test_max = test_max_increment; test_max <= test_max_max; test_max += test_max_increment) | |
| { | |
| printf("\n-------------------------------------------------\nTesting accuracy of functions from 0 to %.1f ...\n-------------------------------------------------\n\n", test_max); | |
| // test the lerp_sin() accuracy | |
| printf("Testing lerp_sin() accuracy...\n"); | |
| float delta = 0.f; | |
| float increment = 0.1f; | |
| float samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(lerp_sin(f) - sinf(f)); | |
| // printf("%.1f: %.7f - %.7f\n", f, lerp_sin(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| //exit(0); | |
| // test the aliased_sin() accuracy | |
| printf("\nTesting aliased_sin() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(aliased_sin(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", aliased_sin(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| // test the fast_sin1() accuracy | |
| printf("\nTesting fast_sin1() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(fast_sin1(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", fast_sin1(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| // test the fast_sin2() accuracy | |
| printf("\nTesting fast_sin2() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(fast_sin2(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", fast_sin2(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| // test the fast_sin3() accuracy | |
| printf("\nTesting fast_sin3() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(fast_sin3(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", fast_sin3(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| // test the fast_sin4() accuracy | |
| printf("\nTesting fast_sin4() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(fast_sin4(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", fast_sin4(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| // test the fast_sin5() accuracy | |
| printf("\nTesting fast_sin5() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(fast_sin5(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", fast_sin5(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| // test the fast_sin6() accuracy | |
| printf("\nTesting fast_sin6() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0.f; d < 6; d++) | |
| { | |
| for(float f = 0; f < test_max; f += increment) | |
| { | |
| delta += fabsf(fast_sin6(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", fast_sin6(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| // test the fast_sin7() accuracy | |
| printf("\nTesting fast_sin7() accuracy...\n"); | |
| delta = 0.f; | |
| increment = 0.1f; | |
| samples = 0.f; | |
| for(int d = 0; d < 6; d++) | |
| { | |
| for(float f = 0.f; f < test_max; f += increment) | |
| { | |
| delta += fabsf(fast_sin7(f) - sinf(f)); | |
| // printf("%.7f - %.7f\n", fast_sin7(f), sinf(f)); | |
| // usleep(333333); | |
| samples += 1.f; | |
| } | |
| printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment); | |
| increment /= 10.f; | |
| } | |
| } | |
| // | |
| printf("\n-------------------------------------------------\nTesting speed of functions using random inputs.\n-------------------------------------------------\n\n"); | |
| // prep benchmarks | |
| setlocale(LC_NUMERIC, ""); | |
| float ret = 0.f; | |
| unsigned long e = 0, ui = 0; | |
| uint64_t st = 0, et = 0, avg = 0; | |
| //////// __rdtsc() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("__rdtsc() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += __rdtsc(); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// sinf() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += sinf(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("sin() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += sinf(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// IIRSine() | |
| IIRSine(2, 0, 0); | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += IIRSine(1, st, 0); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("IIRSine() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += IIRSine(1, 0, 0); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// IIRSine2() | |
| IIRSine2(); | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += IIRSine2(); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("IIRSine2() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += IIRSine2(); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// lerp_sin() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += lerp_sin(randf()*x2PIf); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("lerp_sin() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += lerp_sin(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// aliased_sin() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += aliased_sin(randf()*x2PIf); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("aliased_sin() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += aliased_sin(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// fast_sin1() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += fast_sin1(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("fast_sin1() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += fast_sin1(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// fast_sin2() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += fast_sin2(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("fast_sin2() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += fast_sin2(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// fast_sin3() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += fast_sin3(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("fast_sin3() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += fast_sin3(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// fast_sin4() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += fast_sin4(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("fast_sin4() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += fast_sin4(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// fast_sin5() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += fast_sin5(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("fast_sin5() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += fast_sin5(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// fast_sin6() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += fast_sin6(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("fast_sin6() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += fast_sin6(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// fast_sin7() | |
| avg = 0; | |
| for(int i = 0; i < AVGITER; i++) | |
| { | |
| st = __rdtsc(); | |
| ret += fast_sin7(st); | |
| avg += __rdtsc()-st; | |
| } | |
| printf("fast_sin7() Cycles: %'lu\n", avg / AVGITER); | |
| e = 0; | |
| st = microtime(); | |
| while(microtime() - st <= interval) | |
| { | |
| ret += fast_sin7(randf()*x2PIf); | |
| e++; | |
| } | |
| ui = interval / 1000000; | |
| printf("Executions in %'lu seconds: %'lu\n", ui, e); | |
| printf("Executions per millisecond: %'lu\n", e/(1000*ui)); | |
| printf("Executions per microsecond: %'lu\n", e/(1000000*ui)); | |
| printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui)); | |
| //////// | |
| // done | |
| printf("%c\n", (char)ret); // forces the compiler to not disregard the functions we are testing | |
| return 0; | |
| } |
Hi, I think that there is a bug in fast_sin6 (very nicely reported by clang):
return 1.273239544f * x + -0.636619772f * x * abs(x);here abs is
int abs(int x); in C you wantfabs.Most likely this code came from a C++ codebase where the
float abs(float x)overload exists in , but when compiled as C... :)
Well spotted! thank you very much for pointing this out and I have now updated the gist.
I'm using this to push this project even a little bit further: https://github.com/define-private-public/PSRayTracing
Thanks!
I'm using this to push this project even a little bit further: https://github.com/define-private-public/PSRayTracing
Thanks!
That's really cool. I am just starting work on my own CPU RayTracer in C and this project you are working on looks very exciting, I am just playing with it now and I am very impressed.
Hi, I think that there is a bug in fast_sin6 (very nicely reported by clang):
here abs is
int abs(int x); in C you wantfabs.Most likely this code came from a C++ codebase where the
float abs(float x)overload exists in , but when compiled as C... :)