Last active
September 23, 2019 15:58
-
-
Save Jacajack/c87d1dcfcd44d6a4ed2c5fc94857753d to your computer and use it in GitHub Desktop.
N-section method benchmarking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Compile with: clang -O3 -o nsect nsect.c -Wall -DCOUNT=5000000 -DNSECT=? -funroll-loops | |
*/ | |
#include <math.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <sys/time.h> | |
#define N 6 | |
#ifndef COUNT | |
#error COUNT not defined! | |
#endif | |
#ifndef NSECT | |
#define NSECT 2 | |
#endif | |
float polynomial[N]; | |
float horner( const float poly[N], float x ) | |
{ | |
float val = poly[N-1]; | |
for ( int i = N - 2; i >= 0; i-- ) | |
val = poly[i] + x * val; | |
return val; | |
} | |
float f( float x ) | |
{ | |
return horner( polynomial, x ); | |
} | |
#if 1 | |
float nsect( float a, float b, const float eps_x ) | |
{ | |
float fa = f( a ); | |
float fb = f( b ); | |
if ( fa == 0 ) return a; | |
else if ( fb == 0 ) return b; | |
else if ( fa * fb > 0 ) return NAN; | |
#define L x[0] | |
#define fL fx[0] | |
#define R x[NSECT] | |
#define fR fx[NSECT] | |
float x[NSECT + 1]; | |
float fx[NSECT + 1]; | |
// Ends of the initial interval | |
L = a; | |
fL = fa; | |
R = b; | |
fR = fb; | |
while ( R - L > eps_x ) | |
{ | |
int found = 0; | |
for ( int i = 0; i < NSECT - 1; i++ ) // i reaches NSECT-2; i+1 reaches NSECT-1 | |
{ | |
// Only compute function values inside the interval (1; NSECT-1) | |
x[i + 1] = L + ( R - L ) * (float)( i + 1 ) / NSECT; | |
fx[i + 1] = f( x[i + 1] ); | |
if ( fx[i] * fx[i + 1] < 0 ) | |
{ | |
L = x[i]; | |
fL = fx[i]; | |
R = x[i + 1]; | |
fR = fx[i + 1]; | |
found = 1; | |
break; | |
} | |
} | |
// Now, look for the roots at the right side of the interval | |
if ( !found ) | |
{ | |
if ( fx[NSECT - 1] * fx[NSECT] < 0 ) | |
{ | |
L = x[NSECT - 1]; | |
fL = fx[NSECT - 1]; | |
} | |
else // No roots, so our last hope is that some fx[j] is 0 | |
{ | |
for ( int j = 0; j < NSECT + 1; j++ ) | |
if ( fx[j] == 0 ) | |
return x[j]; | |
return NAN; | |
} | |
} | |
} | |
return ( L + R ) * 0.5f; | |
#undef R | |
#undef L | |
#undef fR | |
#undef fL | |
} | |
#endif | |
#if 0 | |
float nsect( float a, float b, const float eps_x ) | |
{ | |
float fa = f( a ); | |
float fb = f( b ); | |
if ( fa == 0 ) return a; | |
else if ( fb == 0 ) return b; | |
else if ( fa * fb > 0 ) return 0; | |
float x, fx; | |
while ( b - a > eps_x ) | |
{ | |
x = ( b + a ) * 0.5f; | |
fx = f( x ); | |
if ( fa * fx < 0 ) | |
{ | |
b = x; | |
fb = fx; | |
} | |
else if ( fx * fb < 0 ) | |
{ | |
a = x; | |
fa = fx; | |
} | |
else | |
return x; | |
} | |
return ( a + b ) * 0.5f; | |
} | |
#endif | |
int main( int argc, char **argv ) | |
{ | |
struct timeval t0, t1; | |
float *polys = malloc( COUNT * sizeof( float ) * N ); | |
float *p = polys; | |
for ( int i = 0; i < COUNT * N; i++ ) | |
assert( scanf( "%f", p++ ) == 1 ); | |
float xsum = 0; // So the code isn't optimized when we don't print the roots | |
p = polys; | |
gettimeofday( &t0, NULL ); | |
for ( int i = 0; i < COUNT; i++, p += N ) | |
{ | |
memcpy( polynomial, p, N * sizeof( float ) ); | |
float x = nsect( -5, 5, 1e-4 ); | |
xsum += x; | |
#ifdef PRINT_ROOTS | |
fprintf( stderr, "%f\n", x ); | |
#endif | |
} | |
gettimeofday( &t1, NULL ); | |
fprintf( stderr, "xsum: %f\n", xsum ); | |
printf( "%f ms\n", ( ( t1.tv_sec - t0.tv_sec ) * 1e6 + ( t1.tv_usec - t0.tv_usec ) ) * 1e-3 ); | |
free( polys ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment