|
// Public Domain under http://unlicense.org, see link for details. |
|
// EXCEPT "CORE-MATH" marked section |
|
|
|
// {gcc,clang} -O3 -march=native -Wall -Wextra -Wconversion -Wpedantic -Wno-unused-function -fno-math-errno -ffp-contract=off atan_spitball.c -o atan_spitball -lm |
|
|
|
#include <stdint.h> |
|
#include <stdlib.h> |
|
#include <stdio.h> |
|
#include <math.h> |
|
#include <string.h> |
|
#include <assert.h> |
|
|
|
//************************************ |
|
// minimal spitball atan(x) on [-1,1] |
|
|
|
// classic polynomial: abs_error <= 4.952073e-03 (radians) |
|
float atan_p(float x) |
|
{ |
|
static const float C1 = 0.97239410877227783203125f; |
|
static const float C3 = -0.19194793701171875f; |
|
|
|
return x*fmaf(C3*x,x,C1); |
|
} |
|
|
|
// use |x| instead of x^2 variant |
|
// abs_error <= 3.347754e-03 (radians) |
|
float atan_x(float x) |
|
{ |
|
static const float C1 = 1.0546815395355224609375; |
|
static const float C3 = -0.2659356594085693359375; |
|
|
|
return x*fmaf(C3,fabsf(x),C1); |
|
} |
|
|
|
// |x| + lower error at x = +/-1 variant |
|
// abs_error <= 3.741682e-03 (radians) |
|
float atan_e(float x) |
|
{ |
|
static const float C1 = 1.0584795985416213f; |
|
static const float C3 = -0.273081435144173f; |
|
|
|
return x*fmaf(C3,fabsf(x),C1); |
|
} |
|
|
|
|
|
// ************************************ |
|
// The CORE-MATH routine fall under: |
|
// |
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
|
// of this software and associated documentation files (the "Software"), to deal |
|
// in the Software without restriction, including without limitation the rights |
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
//copies of the Software, and to permit persons to whom the Software is |
|
// furnished to do so, subject to the following conditions: |
|
// |
|
// The above copyright notice and this permission notice shall be included in all |
|
// copies or substantial portions of the Software. |
|
// |
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
// SOFTWARE. |
|
|
|
|
|
//********************************************************************** |
|
|
|
#define FUNC_TYPE_ODD 1 |
|
#define FUNC_TYPE_EVEN 2 |
|
|
|
// bunch of wip temp hacks |
|
|
|
typedef struct { |
|
float (*f)(float); |
|
char* name; |
|
char* notes; |
|
uint32_t flags; |
|
uint32_t type; |
|
} func_entry_t; |
|
|
|
typedef struct { |
|
uint32_t x0; |
|
uint32_t x1; |
|
char* name; |
|
} func_range_entry_t; |
|
|
|
typedef struct { |
|
char* name; |
|
func_range_entry_t* table; |
|
uint32_t num; |
|
} func_range_table_t; |
|
|
|
#define LENGTHOF(X) (sizeof(X)/sizeof(X[0])) |
|
#define STRINGIFY(S) STRINGIFY_(S) |
|
#define STRINGIFY_(S) #S |
|
#define ENTRY(X) { .f=&X, .name=STRINGIFY(X) } |
|
|
|
float libm(float x) { return atanf(x); } |
|
|
|
|
|
//********************************************************************** |
|
// SEE: https://core-math.gitlabpages.inria.fr |
|
// and license info at top of file. |
|
|
|
// oh my! |
|
#pragma GCC diagnostic push |
|
#pragma GCC diagnostic ignored "-Wpragmas" |
|
#pragma GCC diagnostic ignored "-Wpedantic" |
|
#pragma GCC diagnostic ignored "-Wunknown-warning-option" |
|
#pragma GCC diagnostic ignored "-Wconversion" |
|
#pragma GCC diagnostic ignored "-Wsign-conversion" |
|
#pragma GCC diagnostic ignored "-Wshorten-64-to-32" |
|
#pragma GCC diagnostic ignored "-Wimplicit-float-conversion" |
|
#pragma GCC diagnostic ignored "-Wimplicit-int-conversion" |
|
#pragma GCC diagnostic ignored "-Wfloat-conversion" |
|
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" |
|
|
|
#include <fenv.h> |
|
|
|
typedef union {float f; uint32_t u;} b32u32_u; |
|
typedef union {double f; uint64_t u;} b64u64_u; |
|
typedef uint64_t u64; |
|
|
|
float cr_atanf(float x){ |
|
const double pi2 = 0x1.921fb54442d18p+0; |
|
b32u32_u t = {.f = x}; |
|
int e = (t.u>>23)&0xff, gt = e>=127; |
|
if(__builtin_expect(e==0xff, 0)) { |
|
if(t.u<<9) return x; // nan |
|
return __builtin_copysign(pi2,(double)x); // inf |
|
} |
|
if (__builtin_expect(e<127-13, 0)){ |
|
if (__builtin_expect(e<127-25, 0)) |
|
return __builtin_fmaf(-x, __builtin_fabsf(x), x); |
|
return __builtin_fmaf(-0x1.5555555555555p-2f*x, x*x, x); |
|
} |
|
/* now |x| >= 0x1p-13 */ |
|
double z = x; |
|
if (gt) z = 1/z; /* gt is non-zero for |x| >= 1 */ |
|
double z2 = z*z, z4 = z2*z2, z8 = z4*z4; |
|
/* polynomials generated using rminimax |
|
(https://gitlab.inria.fr/sfilip/rminimax) with the following command: |
|
./ratapprox --function="atan(x)" --dom=[0.000122070,1] --num=[x,x^3,x^5,x^7,x^9,x^11,x^13] --den=[1,x^2,x^4,x^6,x^8,x^10,x^12] --output=atanf.sollya --log |
|
(see output atanf.sollya) |
|
The coefficient cd[0] was slightly reduced from the original value |
|
0x1.51eccde075d67p-2 to avoid an exceptional case for |x| = 0x1.1ad646p-4 |
|
and rounding to nearest. |
|
*/ |
|
static const double cn[] = |
|
{0x1.51eccde075d67p-2, 0x1.a76bb5637f2f2p-1, 0x1.81e0eed20de88p-1, |
|
0x1.376c8ca67d11dp-2, 0x1.aec7b69202ac6p-5, 0x1.9561899acc73ep-9, |
|
0x1.bf9fa5b67e6p-16}; |
|
static const double cd[] = |
|
{0x1.51eccde075d66p-2, 0x1.dfbdd7b392d28p-1, 0x1p+0, |
|
0x1.fd22bf0e89b54p-2, 0x1.d91ff8b576282p-4, 0x1.653ea99fc9bbp-7, |
|
0x1.1e7fcc202340ap-12}; |
|
double cn0 = cn[0] + z2*cn[1]; |
|
double cn2 = cn[2] + z2*cn[3]; |
|
double cn4 = cn[4] + z2*cn[5]; |
|
double cn6 = cn[6]; |
|
cn0 += z4*cn2; |
|
cn4 += z4*cn6; |
|
cn0 += z8*cn4; |
|
cn0 *= z; |
|
double cd0 = cd[0] + z2*cd[1]; |
|
double cd2 = cd[2] + z2*cd[3]; |
|
double cd4 = cd[4] + z2*cd[5]; |
|
double cd6 = cd[6]; |
|
cd0 += z4*cd2; |
|
cd4 += z4*cd6; |
|
cd0 += z8*cd4; |
|
double r = cn0/cd0; |
|
if (!gt) return r; /* for |x| < 1, (float) r is correctly rounded */ |
|
|
|
/* now |x| >= 1 */ |
|
r = __builtin_copysign(0x1.0fdaa22168c23p-7, z) - r + __builtin_copysign(0x1.9p0, z); |
|
return r; |
|
} |
|
|
|
#pragma GCC diagnostic pop |
|
|
|
|
|
//******************************************************** |
|
|
|
|
|
|
|
func_entry_t func_table[] = |
|
{ |
|
//ENTRY(libm), |
|
ENTRY(atan_p), |
|
ENTRY(atan_x), |
|
ENTRY(atan_e), |
|
}; |
|
|
|
const char* func_name = "atan"; |
|
|
|
float cr_func(float x) { return cr_atanf(x); } |
|
|
|
|
|
//******************************************************** |
|
|
|
static const uint32_t f32_sign_bit_k = UINT32_C(0x80000000); |
|
|
|
static inline uint32_t f32_to_bits(float x) |
|
{ |
|
uint32_t u; memcpy(&u, &x, 4); return u; |
|
} |
|
|
|
static inline float f32_from_bits(uint32_t x) |
|
{ |
|
float f; memcpy(&f, &x, 4); return f; |
|
} |
|
|
|
static inline uint32_t u32_abs(uint32_t x) |
|
{ |
|
return (int32_t)x >= 0 ? x : -x; |
|
} |
|
|
|
|
|
// ulp distance provided a & b are finite |
|
// and have the same sign |
|
static inline uint32_t f32_ulp_dist_ss(float a, float b) |
|
{ |
|
uint32_t ua = f32_to_bits(a); |
|
uint32_t ub = f32_to_bits(b); |
|
return u32_abs(ua-ub); |
|
} |
|
|
|
// ulp distance for finite a & b |
|
static inline uint32_t f32_ulp_dist(float a, float b) |
|
{ |
|
uint32_t ua = f32_to_bits(a); |
|
uint32_t ub = f32_to_bits(b); |
|
|
|
if ((int32_t)(ub^ua) >= 0) |
|
return u32_abs(ua-ub); |
|
|
|
return ua+ub+f32_sign_bit_k; |
|
} |
|
|
|
typedef struct { |
|
float abs; |
|
uint32_t max; |
|
uint32_t ulp[4]; |
|
} func_error_t; |
|
|
|
// tracks total error data across multiple intervals |
|
func_error_t func_error[LENGTHOF(func_table)] = {{0}}; |
|
|
|
|
|
// add error data from current interval to the totals |
|
void error_to_totals(func_error_t* e) |
|
{ |
|
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++, e++) { |
|
func_error[fi].ulp[0] += e->ulp[0]; |
|
func_error[fi].ulp[1] += e->ulp[1]; |
|
func_error[fi].ulp[2] += e->ulp[2]; |
|
func_error[fi].ulp[3] += e->ulp[3]; |
|
|
|
if (e->max > func_error[fi].max) func_error[fi].max = e->max; |
|
if (e->abs > func_error[fi].abs) func_error[fi].abs = e->abs; |
|
} |
|
} |
|
|
|
void error_dump_i(func_error_t* e) |
|
{ |
|
printf("|%15s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%12s|\n", |
|
"func", "max ULP", "CR", "FR", "2 ULP", "> 2 ULP", |
|
"CR%", "FR%", "2 ULP%","> 2 ULP%", "abs"); |
|
|
|
printf("|%15s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%12s|\n", |
|
"---:", "---:", "---:", "---:", "---:", "---:", |
|
"---:", "---:", "---:","---:", "---:"); |
|
|
|
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++, e++) { |
|
uint32_t u0 = e->ulp[0]; |
|
uint32_t u1 = e->ulp[1]; |
|
uint32_t u2 = e->ulp[2]; |
|
uint32_t u3 = e->ulp[3]; |
|
uint32_t t = (u0+u1+u2+u3); |
|
double s = 100.0/(double)t; |
|
|
|
printf("|%15s|%10u|%10u|%10u|%10u|%10u|%10f|%10f|%10f|%10f|%e|\n", |
|
func_table[fi].name, e->max, |
|
u0, u1, u2, u3, |
|
s*u0, s*u1, s*u2, s*u3, e->abs |
|
); |
|
} |
|
} |
|
|
|
void error_dump(void) |
|
{ |
|
printf("\nTOTAL: %s\n", func_name); |
|
error_dump_i(func_error); |
|
} |
|
|
|
static inline void test_error_add(func_error_t* error, float e, float a) |
|
{ |
|
float d = fabsf(e-a); |
|
|
|
if (d == 0.f) { error->ulp[0]++; return; }; |
|
|
|
uint32_t ulp = f32_ulp_dist(e,a); |
|
|
|
if (ulp > error->max) { error->max = ulp; } |
|
if (d > error->abs) { error->abs = d; } |
|
|
|
if (ulp > 3) ulp = 3; |
|
|
|
error->ulp[ulp]++; |
|
} |
|
|
|
void test_force(uint32_t x0, uint32_t x1) |
|
{ |
|
if (x1 < x0) { uint32_t t = x0; x0=x1; x1=t; } |
|
|
|
float f0 = f32_from_bits(x0); |
|
float f1 = f32_from_bits(x1); |
|
|
|
printf("\nchecking: %s on [%08x,%08x] [%e,%e]\n", func_name, x0,x1,f0,f1); |
|
|
|
func_error_t error[LENGTHOF(func_table)] = {{0}}; |
|
|
|
for(uint32_t ix=x0; ix<=x1; ix++) { |
|
float x = f32_from_bits(ix); |
|
float cr = cr_func(x); |
|
|
|
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++) { |
|
float r = func_table[fi].f(x); |
|
test_error_add(error+fi, cr,r); |
|
} |
|
} |
|
|
|
error_to_totals(error); |
|
error_dump_i(error); |
|
} |
|
|
|
void test_linear_range(uint32_t x0, uint32_t x1, float k) |
|
{ |
|
func_error_t error[LENGTHOF(func_table)] = {{0}}; |
|
|
|
float f0 = f32_from_bits(x0); |
|
float f1 = f32_from_bits(x1); |
|
|
|
printf("\nchecking: %s on [%08x,%08x] [%e,%e] : {linear result range %a*x}\n", func_name, x0,x1,f0,f1,k); |
|
|
|
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++) { |
|
for(uint32_t xi=x0; xi<=x1; xi++) { |
|
float x = f32_from_bits(xi); |
|
float r = func_table[fi].f(x); |
|
float cr = k*x; |
|
test_error_add(error+fi,cr,r); |
|
} |
|
} |
|
error_to_totals(error); |
|
error_dump_i(error); |
|
} |
|
|
|
|
|
// test a full power-of-two interval starting from x. (need not start on a boundary) |
|
void test_1pot(float x) |
|
{ |
|
uint32_t x0 = f32_to_bits(x); |
|
uint32_t x1 = f32_to_bits(2.f*x)-1; |
|
test_force(x0,x1); |
|
} |
|
|
|
// correctly rounded f(x) = k on [x0,x1] |
|
void test_const_range(uint32_t x0, uint32_t x1, float k) |
|
{ |
|
func_error_t error[LENGTHOF(func_table)] = {{0}}; |
|
|
|
float f0 = f32_from_bits(x0); |
|
float f1 = f32_from_bits(x1); |
|
|
|
printf("\nchecking: %s on [%08x,%08x] [%e,%e] : {constant result range %a}\n", func_name, x0,x1,f0,f1,k); |
|
|
|
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++) { |
|
for(uint32_t xi=x0; xi<=x1; xi++) { |
|
float x = f32_from_bits(xi); |
|
float r = func_table[fi].f(x); |
|
test_error_add(error+fi,k,r); |
|
} |
|
} |
|
error_to_totals(error); |
|
error_dump_i(error); |
|
} |
|
|
|
void test_all(void) |
|
{ |
|
uint32_t x0 = 0; |
|
uint32_t x1 = 0x39b89ba2; |
|
|
|
test_linear_range(x0, x1, 1.f); |
|
|
|
x0 = x1+1; x1=f32_to_bits(1.f/64.f); |
|
test_force(x0,x1); |
|
|
|
// break-down the interior a bit. probably overkill WRT breakdown |
|
test_1pot(1.f/64.f); |
|
test_1pot(1.f/32.f); |
|
test_1pot(1.f/16.f); |
|
test_1pot(1.f/ 8.f); |
|
test_1pot(1.f/ 4.f); |
|
test_1pot(1.f/ 2.f); |
|
} |
|
|
|
int main(void) |
|
{ |
|
printf("<details markdown=\"1\"><summary>click for range breakdown</summary>\n\n"); |
|
|
|
test_all(); |
|
|
|
printf("\n</details>\n\n"); |
|
|
|
// dump summary |
|
error_dump(); |
|
|
|
return 0; |
|
} |