mrbid · March 1, 2024 17:27 · jcelerier · Sep 17, 2021 · mrbid · Sep 17, 2021
diff --git a/sine_bench.c b/sine_bench.c
 /*
    James William Fletcher (github.com/mrbid)
        August 2021 - March 2024

    Benchmarking sine wave functions.
    https://james-william-fletcher.medium.com/benchmarking-sine-functions-16b067bf63ce

    references:
    http://www.ee.ic.ac.uk/pcheung/teaching/ee3_Study_Project/Sinewave%20Generation(708).pdf
    https://demonstrations.wolfram.com/SineWaveGenerationUsingAnUnstableIIRFilter/
    https://en.wikipedia.org/wiki/Goertzel_algorithm

    compile: gcc sine_bench.c -Ofast -lm -o sine_bench
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <math.h>
 #include <string.h>
 #include <locale.h>
 #include <time.h>
 #include <sys/time.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <x86intrin.h>

 #define interval 1000000
 #define AVGITER 3000000

 int srandfq = 74235;
 void srandf(const int seed)
 {
    srandfq = seed;
 }
 float randf()
 {
    // https://www.musicdsp.org/en/latest/Other/273-fast-float-random-numbers.html
    // [email protected]
    srandfq *= 16807;
    return (float)(srandfq & 0x7FFFFFFF) * 4.6566129e-010f;
 }

 // source; James William Fletcher; this should be accurate to three decimal places
 #define PIf 3.141592741f
 #define x2PIf 6.283185482f   // PI*2
 #define rx2PIf 0.1591549367f // 1/(PI*2)
 #define wlerp(a, b, i) ((b - a) * i + a)

 float lerp_sin(const float theta)
 {
    // data for the wavetable
    static int init = 0;
    static float sine_wtable[256] = {0};

    // called once on first execution
    // generates the wave table
    if(init == 0)
    {
        for(int i = 0; i < 256; i++)
            sine_wtable[i] = sinf(i * 0.02454369329f); // 0.02454369329f = x2PIf / 256.f;
        init = 1;
    }

    // normalise theta to a 0-1 range
    float norm = theta * rx2PIf;

    // preserve the fractional part that will be lopped off by the wrapping
    const float a = norm * 256.f; // scales normal range to wave table range
    const float fract = a-floor(a);

    // wrap it
    const unsigned char i = (unsigned char)(a); // this cast does the wrapping for us, it's fast.

    // check for end of array case for wrap-around lerp.
    if(i >= 255)
        return wlerp(sine_wtable[255], sine_wtable[0], fract);

    // do regular table lerp
    return wlerp(sine_wtable[i], sine_wtable[i+1], fract);
 }

 float aliased_sin(const float theta)
 {
    // data for the wavetable
    static int init = 0;
    static float sine_wtable[65536] = {0};

    // called once on first execution
    if(init == 0)
    {
        for(int i = 0; i < 65536; i++)
            sine_wtable[i] = sinf(i * 9.587380191e-05f); // 9.587380191e-05f = x2PIf / 65536.f;
        init = 1;
    }

    // return result
    const unsigned short i = (unsigned short)(10430.37793f * theta); // 10430.37793f = 65536.f / x2PIf
    return sine_wtable[i];
 }

 // source; KVRAF; https://www.kvraudio.com/forum/viewtopic.php?t=321027
 // https://www.kvraudio.com/forum/viewtopic.php?p=4555831&sid=b366604bf9b3cfd7892f40e7b3fee4e4#p4555831
 // omega is passed as 2.0*PI*frequency/sampleRate
 // Initialise as IIRSine(2, omega, 0); then call on tick as IIRSine(0, 0, 0);
 float IIRSine(const int set, const float omega, const float phase)
 {
    static float m_a1, m_y1, m_y2;

    if(set > 0)
    {
        m_a1 = omega;

        if(set > 1)
        {
            m_a1 = 2.0f * cosf(m_a1);

            if(set > 2)
            {
                m_y1 = sinf(phase - omega); 
                m_y2 = sinf(phase - 2.0f * omega);
            }
        }
    }

    const float y3 = m_a1 * m_y1 - m_y2;
    m_y2 = m_y1;
    m_y1 = y3;
    return y3;
 }

 // source; analog.com (SHARC)
 // ORIGINAL CODE FROM SHARC
 // short output;
 // main(){
 // int i;    const short A=0x7e66;       /* A=(1.975/2 * 32768) */
 // short y[3]={0,0x1209,0};    /* (y0,y1,y2), y1=(0.1409*32768) */
 // for (i=0; i<40; i++) {        y[0] = (((A*y[1])>>15) + ((A*y[1])>>15)) – y[2];        y[2] = y[1];        /* y2 <–– y1 */        y[1] = y[0];        /* y1 <–– y0 */
 //     output = y[0];
 // }}
 float IIRSine2()
 {
    static float mv[3] = {0, 0.1409f, 0};
    static const float A = 1.975f/2.f;
    mv[0] = A * mv[1] - mv[2];
    mv[2] = mv[1];
    mv[1] = mv[0];
    return mv[0];
 }

 // source; https://stackoverflow.com/a/62425331
 float fast_sin1(float x)
 {
    x /= 2.f * PIf;
    x -= (int)x;

    if(x <= 0.5f)
    {
        float t = 2.f * x * (2.f * x - 1.f);
        return (PIf * t) / ((PIf - 4.f) * t - 1.f);
    }
    else
    {
        float t = 2.f * (1.f - x) * (1.f - 2.f * x);
        return -(PIf * t) / ((PIf - 4.f) * t - 1.f);
    }
 }

 // source; https://www.gamedev.net/forums/topic/621589-extremely-fast-sin-approximation/
 double fast_sin2(double x)
 {
    int k;
    double y;
    double z;

    z = x;
    z *= 0.3183098861837907;
    z += 6755399441055744.0;
    k = *((int *) &z);
    z = k;
    z *= 3.1415926535897932;
    x -= z;
    y = x;
    y *= x;
    z = 0.0073524681968701;
    z *= y;
    z -= 0.1652891139701474;
    z *= y;
    z += 0.9996919862959676;
    x *= z;
    k &= 1;
    k += k;
    z = k;
    z *= x;
    x -= z;

    return x;
 }

 // source; https://www.musicdsp.org/en/latest/Other/115-sin-cos-tan-approximation.html
 float fast_sin3(float fAngle)
 {
    float fASqr = fAngle*fAngle;
    float fResult = 7.61e-03f;
    fResult *= fASqr;
    fResult -= 1.6605e-01f;
    fResult *= fASqr;
    fResult += 1.0f;
    fResult *= fAngle;
    return fResult;
 }

 float fast_sin4(float fAngle)
 {
    float fASqr = fAngle*fAngle;
    float fResult = -2.39e-08f;
    fResult *= fASqr;
    fResult += 2.7526e-06f;
    fResult *= fASqr;
    fResult -= 1.98409e-04f;
    fResult *= fASqr;
    fResult += 8.3333315e-03f;
    fResult *= fASqr;
    fResult -= 1.666666664e-01f;
    fResult *= fASqr;
    fResult += 1.0f;
    fResult *= fAngle;
    return fResult;
 }

 // source; https://stackoverflow.com/a/52693283
 float fast_sin5(float x)
 {
  return (16.f * x * (PIf - x)) / (5.f * PIf * PIf - (4.f * x * (PIf - x)));
  //return ( 4 * x * (180 - x)) / (40500 - x * (180 - x));
 }

 // source; I don't know, I found these some ten years ago and failed to accredit the original authors.
 float fast_sin6(float x)
 {
    return 1.273239544f * x + -0.636619772f * x * fabsf(x);
 }

 float fast_sin7(float x)
 {
    float res = 0.f, pow = x, fact = 1.f;

    for(int i = 0; i < 5; ++i)
    {
        res += pow / fact;
        pow *= x * x;
        fact *= (2.f*(i+1.f))*(2.f*(i+1.f)+1.f);
    }

    return res;
 }

 // main
 uint64_t microtime()
 {
    struct timeval tv;
    struct timezone tz;
    memset(&tz, 0, sizeof(struct timezone));
    gettimeofday(&tv, &tz);
    return 1000000 * tv.tv_sec + tv.tv_usec;
 }

 int main()
 {
    srandf(time(0));

    // the big accuracy bench
    const float test_max_increment = x2PIf/2;
    const float test_max_max = x2PIf*2; // ssh, our secret.
    for(float test_max = test_max_increment; test_max <= test_max_max; test_max += test_max_increment)
    {
        printf("\n-------------------------------------------------\nTesting accuracy of functions from 0 to %.1f ...\n-------------------------------------------------\n\n", test_max);

        // test the lerp_sin() accuracy
        printf("Testing lerp_sin() accuracy...\n");
        float delta = 0.f;
        float increment = 0.1f;
        float samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(lerp_sin(f) - sinf(f));
                // printf("%.1f: %.7f - %.7f\n", f, lerp_sin(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }
        //exit(0);

        // test the aliased_sin() accuracy
        printf("\nTesting aliased_sin() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(aliased_sin(f) - sinf(f));
                // printf("%.7f - %.7f\n", aliased_sin(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }

        // test the fast_sin1() accuracy
        printf("\nTesting fast_sin1() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(fast_sin1(f) - sinf(f));
                // printf("%.7f - %.7f\n", fast_sin1(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }

        // test the fast_sin2() accuracy
        printf("\nTesting fast_sin2() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(fast_sin2(f) - sinf(f));
                // printf("%.7f - %.7f\n", fast_sin2(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }

        // test the fast_sin3() accuracy
        printf("\nTesting fast_sin3() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(fast_sin3(f) - sinf(f));
                // printf("%.7f - %.7f\n", fast_sin3(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }

        // test the fast_sin4() accuracy
        printf("\nTesting fast_sin4() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(fast_sin4(f) - sinf(f));
                // printf("%.7f - %.7f\n", fast_sin4(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }

        // test the fast_sin5() accuracy
        printf("\nTesting fast_sin5() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(fast_sin5(f) - sinf(f));
                // printf("%.7f - %.7f\n", fast_sin5(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }

        // test the fast_sin6() accuracy
        printf("\nTesting fast_sin6() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0.f; d < 6; d++)
        {
            for(float f = 0; f < test_max; f += increment)
            {
                delta += fabsf(fast_sin6(f) - sinf(f));
                // printf("%.7f - %.7f\n", fast_sin6(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }

        // test the fast_sin7() accuracy
        printf("\nTesting fast_sin7() accuracy...\n");
        delta = 0.f;
        increment = 0.1f;
        samples = 0.f;
        for(int d = 0; d < 6; d++)
        {
            for(float f = 0.f; f < test_max; f += increment)
            {
                delta += fabsf(fast_sin7(f) - sinf(f));
                // printf("%.7f - %.7f\n", fast_sin7(f), sinf(f));
                // usleep(333333);
                samples += 1.f;
            }
            printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
            increment /= 10.f;
        }
    }

    //
    printf("\n-------------------------------------------------\nTesting speed of functions using random inputs.\n-------------------------------------------------\n\n");

    // prep benchmarks
    setlocale(LC_NUMERIC, "");
    float ret = 0.f;
    unsigned long e = 0, ui = 0;
    uint64_t st = 0, et = 0, avg = 0;

    //////// __rdtsc()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        avg += __rdtsc()-st;
    }
    printf("__rdtsc() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += __rdtsc();
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// sinf()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += sinf(st);
        avg += __rdtsc()-st;
    }
    printf("sin() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += sinf(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// IIRSine()

    IIRSine(2, 0, 0);

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += IIRSine(1, st, 0);
        avg += __rdtsc()-st;
    }
    printf("IIRSine() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += IIRSine(1, 0, 0);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// IIRSine2()

    IIRSine2();

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += IIRSine2();
        avg += __rdtsc()-st;
    }
    printf("IIRSine2() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += IIRSine2();
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// lerp_sin()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += lerp_sin(randf()*x2PIf);
        avg += __rdtsc()-st;
    }
    printf("lerp_sin() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += lerp_sin(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// aliased_sin()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += aliased_sin(randf()*x2PIf);
        avg += __rdtsc()-st;
    }
    printf("aliased_sin() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += aliased_sin(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// fast_sin1()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += fast_sin1(st);
        avg += __rdtsc()-st;
    }
    printf("fast_sin1() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += fast_sin1(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// fast_sin2()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += fast_sin2(st);
        avg += __rdtsc()-st;
    }
    printf("fast_sin2() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += fast_sin2(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// fast_sin3()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += fast_sin3(st);
        avg += __rdtsc()-st;
    }
    printf("fast_sin3() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += fast_sin3(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// fast_sin4()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += fast_sin4(st);
        avg += __rdtsc()-st;
    }
    printf("fast_sin4() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += fast_sin4(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// fast_sin5()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += fast_sin5(st);
        avg += __rdtsc()-st;
    }
    printf("fast_sin5() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += fast_sin5(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// fast_sin6()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += fast_sin6(st);
        avg += __rdtsc()-st;
    }
    printf("fast_sin6() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += fast_sin6(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    //////// fast_sin7()

    avg = 0;
    for(int i = 0; i < AVGITER; i++)
    {
        st = __rdtsc();
        ret += fast_sin7(st);
        avg += __rdtsc()-st;
    }
    printf("fast_sin7() Cycles: %'lu\n", avg / AVGITER);

    e = 0;
    st = microtime();
    while(microtime() - st <= interval)
    {
        ret += fast_sin7(randf()*x2PIf);
        e++;
    }
    ui = interval / 1000000;
    printf("Executions in %'lu seconds: %'lu\n", ui, e);
    printf("Executions per millisecond: %'lu\n", e/(1000*ui));
    printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
    printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

    ////////

    // done
    printf("%c\n", (char)ret);  // forces the compiler to not disregard the functions we are testing
    return 0;
 }
	/*
	James William Fletcher (github.com/mrbid)
	August 2021 - March 2024

	Benchmarking sine wave functions.
	https://james-william-fletcher.medium.com/benchmarking-sine-functions-16b067bf63ce

	references:
	http://www.ee.ic.ac.uk/pcheung/teaching/ee3_Study_Project/Sinewave%20Generation(708).pdf
	https://demonstrations.wolfram.com/SineWaveGenerationUsingAnUnstableIIRFilter/
	https://en.wikipedia.org/wiki/Goertzel_algorithm

	compile: gcc sine_bench.c -Ofast -lm -o sine_bench
	*/
	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <math.h>
	#include <string.h>
	#include <locale.h>
	#include <time.h>
	#include <sys/time.h>
	#include <unistd.h>
	#include <fcntl.h>
	#include <x86intrin.h>

	#define interval 1000000
	#define AVGITER 3000000

	int srandfq = 74235;
	void srandf(const int seed)
	{
	srandfq = seed;
	}
	float randf()
	{
	// https://www.musicdsp.org/en/latest/Other/273-fast-float-random-numbers.html
	// [email protected]
	srandfq *= 16807;
	return (float)(srandfq & 0x7FFFFFFF) * 4.6566129e-010f;
	}

	// source; James William Fletcher; this should be accurate to three decimal places
	#define PIf 3.141592741f
	#define x2PIf 6.283185482f // PI*2
	#define rx2PIf 0.1591549367f // 1/(PI*2)
	#define wlerp(a, b, i) ((b - a) * i + a)

	float lerp_sin(const float theta)
	{
	// data for the wavetable
	static int init = 0;
	static float sine_wtable[256] = {0};

	// called once on first execution
	// generates the wave table
	if(init == 0)
	{
	for(int i = 0; i < 256; i++)
	sine_wtable[i] = sinf(i * 0.02454369329f); // 0.02454369329f = x2PIf / 256.f;
	init = 1;
	}

	// normalise theta to a 0-1 range
	float norm = theta * rx2PIf;

	// preserve the fractional part that will be lopped off by the wrapping
	const float a = norm * 256.f; // scales normal range to wave table range
	const float fract = a-floor(a);

	// wrap it
	const unsigned char i = (unsigned char)(a); // this cast does the wrapping for us, it's fast.

	// check for end of array case for wrap-around lerp.
	if(i >= 255)
	return wlerp(sine_wtable[255], sine_wtable[0], fract);

	// do regular table lerp
	return wlerp(sine_wtable[i], sine_wtable[i+1], fract);
	}

	float aliased_sin(const float theta)
	{
	// data for the wavetable
	static int init = 0;
	static float sine_wtable[65536] = {0};

	// called once on first execution
	if(init == 0)
	{
	for(int i = 0; i < 65536; i++)
	sine_wtable[i] = sinf(i * 9.587380191e-05f); // 9.587380191e-05f = x2PIf / 65536.f;
	init = 1;
	}

	// return result
	const unsigned short i = (unsigned short)(10430.37793f * theta); // 10430.37793f = 65536.f / x2PIf
	return sine_wtable[i];
	}

	// source; KVRAF; https://www.kvraudio.com/forum/viewtopic.php?t=321027
	// https://www.kvraudio.com/forum/viewtopic.php?p=4555831&sid=b366604bf9b3cfd7892f40e7b3fee4e4#p4555831
	// omega is passed as 2.0PIfrequency/sampleRate
	// Initialise as IIRSine(2, omega, 0); then call on tick as IIRSine(0, 0, 0);
	float IIRSine(const int set, const float omega, const float phase)
	{
	static float m_a1, m_y1, m_y2;

	if(set > 0)
	{
	m_a1 = omega;

	if(set > 1)
	{
	m_a1 = 2.0f * cosf(m_a1);

	if(set > 2)
	{
	m_y1 = sinf(phase - omega);
	m_y2 = sinf(phase - 2.0f * omega);
	}
	}
	}

	const float y3 = m_a1 * m_y1 - m_y2;
	m_y2 = m_y1;
	m_y1 = y3;
	return y3;
	}

	// source; analog.com (SHARC)
	// ORIGINAL CODE FROM SHARC
	// short output;
	// main(){
	// int i; const short A=0x7e66; /* A=(1.975/2 * 32768) */
	// short y[3]={0,0x1209,0}; /* (y0,y1,y2), y1=(0.140932768) /
	// for (i=0; i<40; i++) { y[0] = (((Ay[1])>>15) + ((Ay[1])>>15)) – y[2]; y[2] = y[1]; /* y2 <–– y1 / y[1] = y[0]; / y1 <–– y0 */
	// output = y[0];
	// }}
	float IIRSine2()
	{
	static float mv[3] = {0, 0.1409f, 0};
	static const float A = 1.975f/2.f;
	mv[0] = A * mv[1] - mv[2];
	mv[2] = mv[1];
	mv[1] = mv[0];
	return mv[0];
	}

	// source; https://stackoverflow.com/a/62425331
	float fast_sin1(float x)
	{
	x /= 2.f * PIf;
	x -= (int)x;

	if(x <= 0.5f)
	{
	float t = 2.f * x * (2.f * x - 1.f);
	return (PIf * t) / ((PIf - 4.f) * t - 1.f);
	}
	else
	{
	float t = 2.f * (1.f - x) * (1.f - 2.f * x);
	return -(PIf * t) / ((PIf - 4.f) * t - 1.f);
	}
	}

	// source; https://www.gamedev.net/forums/topic/621589-extremely-fast-sin-approximation/
	double fast_sin2(double x)
	{
	int k;
	double y;
	double z;

	z = x;
	z *= 0.3183098861837907;
	z += 6755399441055744.0;
	k = ((int ) &z);
	z = k;
	z *= 3.1415926535897932;
	x -= z;
	y = x;
	y *= x;
	z = 0.0073524681968701;
	z *= y;
	z -= 0.1652891139701474;
	z *= y;
	z += 0.9996919862959676;
	x *= z;
	k &= 1;
	k += k;
	z = k;
	z *= x;
	x -= z;

	return x;
	}

	// source; https://www.musicdsp.org/en/latest/Other/115-sin-cos-tan-approximation.html
	float fast_sin3(float fAngle)
	{
	float fASqr = fAngle*fAngle;
	float fResult = 7.61e-03f;
	fResult *= fASqr;
	fResult -= 1.6605e-01f;
	fResult *= fASqr;
	fResult += 1.0f;
	fResult *= fAngle;
	return fResult;
	}

	float fast_sin4(float fAngle)
	{
	float fASqr = fAngle*fAngle;
	float fResult = -2.39e-08f;
	fResult *= fASqr;
	fResult += 2.7526e-06f;
	fResult *= fASqr;
	fResult -= 1.98409e-04f;
	fResult *= fASqr;
	fResult += 8.3333315e-03f;
	fResult *= fASqr;
	fResult -= 1.666666664e-01f;
	fResult *= fASqr;
	fResult += 1.0f;
	fResult *= fAngle;
	return fResult;
	}

	// source; https://stackoverflow.com/a/52693283
	float fast_sin5(float x)
	{
	return (16.f * x * (PIf - x)) / (5.f * PIf * PIf - (4.f * x * (PIf - x)));
	//return ( 4 * x * (180 - x)) / (40500 - x * (180 - x));
	}

	// source; I don't know, I found these some ten years ago and failed to accredit the original authors.
	float fast_sin6(float x)
	{
	return 1.273239544f * x + -0.636619772f * x * fabsf(x);
	}

	float fast_sin7(float x)
	{
	float res = 0.f, pow = x, fact = 1.f;

	for(int i = 0; i < 5; ++i)
	{
	res += pow / fact;
	pow = x x;
	fact = (2.f(i+1.f))(2.f(i+1.f)+1.f);
	}

	return res;
	}

	// main
	uint64_t microtime()
	{
	struct timeval tv;
	struct timezone tz;
	memset(&tz, 0, sizeof(struct timezone));
	gettimeofday(&tv, &tz);
	return 1000000 * tv.tv_sec + tv.tv_usec;
	}

	int main()
	{
	srandf(time(0));

	// the big accuracy bench
	const float test_max_increment = x2PIf/2;
	const float test_max_max = x2PIf*2; // ssh, our secret.
	for(float test_max = test_max_increment; test_max <= test_max_max; test_max += test_max_increment)
	{
	printf("\n-------------------------------------------------\nTesting accuracy of functions from 0 to %.1f ...\n-------------------------------------------------\n\n", test_max);

	// test the lerp_sin() accuracy
	printf("Testing lerp_sin() accuracy...\n");
	float delta = 0.f;
	float increment = 0.1f;
	float samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(lerp_sin(f) - sinf(f));
	// printf("%.1f: %.7f - %.7f\n", f, lerp_sin(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}
	//exit(0);

	// test the aliased_sin() accuracy
	printf("\nTesting aliased_sin() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(aliased_sin(f) - sinf(f));
	// printf("%.7f - %.7f\n", aliased_sin(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}

	// test the fast_sin1() accuracy
	printf("\nTesting fast_sin1() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(fast_sin1(f) - sinf(f));
	// printf("%.7f - %.7f\n", fast_sin1(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}

	// test the fast_sin2() accuracy
	printf("\nTesting fast_sin2() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(fast_sin2(f) - sinf(f));
	// printf("%.7f - %.7f\n", fast_sin2(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}

	// test the fast_sin3() accuracy
	printf("\nTesting fast_sin3() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(fast_sin3(f) - sinf(f));
	// printf("%.7f - %.7f\n", fast_sin3(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}

	// test the fast_sin4() accuracy
	printf("\nTesting fast_sin4() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(fast_sin4(f) - sinf(f));
	// printf("%.7f - %.7f\n", fast_sin4(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}

	// test the fast_sin5() accuracy
	printf("\nTesting fast_sin5() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(fast_sin5(f) - sinf(f));
	// printf("%.7f - %.7f\n", fast_sin5(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}

	// test the fast_sin6() accuracy
	printf("\nTesting fast_sin6() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0.f; d < 6; d++)
	{
	for(float f = 0; f < test_max; f += increment)
	{
	delta += fabsf(fast_sin6(f) - sinf(f));
	// printf("%.7f - %.7f\n", fast_sin6(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}

	// test the fast_sin7() accuracy
	printf("\nTesting fast_sin7() accuracy...\n");
	delta = 0.f;
	increment = 0.1f;
	samples = 0.f;
	for(int d = 0; d < 6; d++)
	{
	for(float f = 0.f; f < test_max; f += increment)
	{
	delta += fabsf(fast_sin7(f) - sinf(f));
	// printf("%.7f - %.7f\n", fast_sin7(f), sinf(f));
	// usleep(333333);
	samples += 1.f;
	}
	printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
	increment /= 10.f;
	}
	}

	//
	printf("\n-------------------------------------------------\nTesting speed of functions using random inputs.\n-------------------------------------------------\n\n");

	// prep benchmarks
	setlocale(LC_NUMERIC, "");
	float ret = 0.f;
	unsigned long e = 0, ui = 0;
	uint64_t st = 0, et = 0, avg = 0;

	//////// __rdtsc()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	avg += __rdtsc()-st;
	}
	printf("__rdtsc() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += __rdtsc();
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// sinf()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += sinf(st);
	avg += __rdtsc()-st;
	}
	printf("sin() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += sinf(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// IIRSine()

	IIRSine(2, 0, 0);

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += IIRSine(1, st, 0);
	avg += __rdtsc()-st;
	}
	printf("IIRSine() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += IIRSine(1, 0, 0);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// IIRSine2()

	IIRSine2();

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += IIRSine2();
	avg += __rdtsc()-st;
	}
	printf("IIRSine2() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += IIRSine2();
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// lerp_sin()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += lerp_sin(randf()*x2PIf);
	avg += __rdtsc()-st;
	}
	printf("lerp_sin() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += lerp_sin(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// aliased_sin()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += aliased_sin(randf()*x2PIf);
	avg += __rdtsc()-st;
	}
	printf("aliased_sin() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += aliased_sin(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// fast_sin1()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += fast_sin1(st);
	avg += __rdtsc()-st;
	}
	printf("fast_sin1() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += fast_sin1(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// fast_sin2()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += fast_sin2(st);
	avg += __rdtsc()-st;
	}
	printf("fast_sin2() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += fast_sin2(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// fast_sin3()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += fast_sin3(st);
	avg += __rdtsc()-st;
	}
	printf("fast_sin3() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += fast_sin3(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// fast_sin4()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += fast_sin4(st);
	avg += __rdtsc()-st;
	}
	printf("fast_sin4() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += fast_sin4(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// fast_sin5()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += fast_sin5(st);
	avg += __rdtsc()-st;
	}
	printf("fast_sin5() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += fast_sin5(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// fast_sin6()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += fast_sin6(st);
	avg += __rdtsc()-st;
	}
	printf("fast_sin6() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += fast_sin6(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	//////// fast_sin7()

	avg = 0;
	for(int i = 0; i < AVGITER; i++)
	{
	st = __rdtsc();
	ret += fast_sin7(st);
	avg += __rdtsc()-st;
	}
	printf("fast_sin7() Cycles: %'lu\n", avg / AVGITER);

	e = 0;
	st = microtime();
	while(microtime() - st <= interval)
	{
	ret += fast_sin7(randf()*x2PIf);
	e++;
	}
	ui = interval / 1000000;
	printf("Executions in %'lu seconds: %'lu\n", ui, e);
	printf("Executions per millisecond: %'lu\n", e/(1000*ui));
	printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
	printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));

	////////

	// done
	printf("%c\n", (char)ret); // forces the compiler to not disregard the functions we are testing
	return 0;
	}