Skip to content

Instantly share code, notes, and snippets.

@Marc-B-Reynolds
Created January 20, 2025 21:11
Show Gist options
  • Save Marc-B-Reynolds/329de5171ae5d4ad4745a7439cbbdf27 to your computer and use it in GitHub Desktop.
Save Marc-B-Reynolds/329de5171ae5d4ad4745a7439cbbdf27 to your computer and use it in GitHub Desktop.
3 minimal single parameter atan implementations on [-1,1]. Only accurate to about 1/5 a degree.
// Public Domain under http://unlicense.org, see link for details.
// EXCEPT "CORE-MATH" marked section
// {gcc,clang} -O3 -march=native -Wall -Wextra -Wconversion -Wpedantic -Wno-unused-function -fno-math-errno -ffp-contract=off atan_spitball.c -o atan_spitball -lm
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <assert.h>
//************************************
// minimal spitball atan(x) on [-1,1]
// classic polynomial: abs_error <= 4.952073e-03 (radians)
float atan_p(float x)
{
static const float C1 = 0.97239410877227783203125f;
static const float C3 = -0.19194793701171875f;
return x*fmaf(C3*x,x,C1);
}
// use |x| instead of x^2 variant
// abs_error <= 3.347754e-03 (radians)
float atan_x(float x)
{
static const float C1 = 1.0546815395355224609375;
static const float C3 = -0.2659356594085693359375;
return x*fmaf(C3,fabsf(x),C1);
}
// |x| + lower error at x = +/-1 variant
// abs_error <= 3.741682e-03 (radians)
float atan_e(float x)
{
static const float C1 = 1.0584795985416213f;
static const float C3 = -0.273081435144173f;
return x*fmaf(C3,fabsf(x),C1);
}
// ************************************
// The CORE-MATH routine fall under:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//**********************************************************************
#define FUNC_TYPE_ODD 1
#define FUNC_TYPE_EVEN 2
// bunch of wip temp hacks
typedef struct {
float (*f)(float);
char* name;
char* notes;
uint32_t flags;
uint32_t type;
} func_entry_t;
typedef struct {
uint32_t x0;
uint32_t x1;
char* name;
} func_range_entry_t;
typedef struct {
char* name;
func_range_entry_t* table;
uint32_t num;
} func_range_table_t;
#define LENGTHOF(X) (sizeof(X)/sizeof(X[0]))
#define STRINGIFY(S) STRINGIFY_(S)
#define STRINGIFY_(S) #S
#define ENTRY(X) { .f=&X, .name=STRINGIFY(X) }
float libm(float x) { return atanf(x); }
//**********************************************************************
// SEE: https://core-math.gitlabpages.inria.fr
// and license info at top of file.
// oh my!
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpragmas"
#pragma GCC diagnostic ignored "-Wpedantic"
#pragma GCC diagnostic ignored "-Wunknown-warning-option"
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wsign-conversion"
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
#pragma GCC diagnostic ignored "-Wimplicit-float-conversion"
#pragma GCC diagnostic ignored "-Wimplicit-int-conversion"
#pragma GCC diagnostic ignored "-Wfloat-conversion"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#include <fenv.h>
typedef union {float f; uint32_t u;} b32u32_u;
typedef union {double f; uint64_t u;} b64u64_u;
typedef uint64_t u64;
float cr_atanf(float x){
const double pi2 = 0x1.921fb54442d18p+0;
b32u32_u t = {.f = x};
int e = (t.u>>23)&0xff, gt = e>=127;
if(__builtin_expect(e==0xff, 0)) {
if(t.u<<9) return x; // nan
return __builtin_copysign(pi2,(double)x); // inf
}
if (__builtin_expect(e<127-13, 0)){
if (__builtin_expect(e<127-25, 0))
return __builtin_fmaf(-x, __builtin_fabsf(x), x);
return __builtin_fmaf(-0x1.5555555555555p-2f*x, x*x, x);
}
/* now |x| >= 0x1p-13 */
double z = x;
if (gt) z = 1/z; /* gt is non-zero for |x| >= 1 */
double z2 = z*z, z4 = z2*z2, z8 = z4*z4;
/* polynomials generated using rminimax
(https://gitlab.inria.fr/sfilip/rminimax) with the following command:
./ratapprox --function="atan(x)" --dom=[0.000122070,1] --num=[x,x^3,x^5,x^7,x^9,x^11,x^13] --den=[1,x^2,x^4,x^6,x^8,x^10,x^12] --output=atanf.sollya --log
(see output atanf.sollya)
The coefficient cd[0] was slightly reduced from the original value
0x1.51eccde075d67p-2 to avoid an exceptional case for |x| = 0x1.1ad646p-4
and rounding to nearest.
*/
static const double cn[] =
{0x1.51eccde075d67p-2, 0x1.a76bb5637f2f2p-1, 0x1.81e0eed20de88p-1,
0x1.376c8ca67d11dp-2, 0x1.aec7b69202ac6p-5, 0x1.9561899acc73ep-9,
0x1.bf9fa5b67e6p-16};
static const double cd[] =
{0x1.51eccde075d66p-2, 0x1.dfbdd7b392d28p-1, 0x1p+0,
0x1.fd22bf0e89b54p-2, 0x1.d91ff8b576282p-4, 0x1.653ea99fc9bbp-7,
0x1.1e7fcc202340ap-12};
double cn0 = cn[0] + z2*cn[1];
double cn2 = cn[2] + z2*cn[3];
double cn4 = cn[4] + z2*cn[5];
double cn6 = cn[6];
cn0 += z4*cn2;
cn4 += z4*cn6;
cn0 += z8*cn4;
cn0 *= z;
double cd0 = cd[0] + z2*cd[1];
double cd2 = cd[2] + z2*cd[3];
double cd4 = cd[4] + z2*cd[5];
double cd6 = cd[6];
cd0 += z4*cd2;
cd4 += z4*cd6;
cd0 += z8*cd4;
double r = cn0/cd0;
if (!gt) return r; /* for |x| < 1, (float) r is correctly rounded */
/* now |x| >= 1 */
r = __builtin_copysign(0x1.0fdaa22168c23p-7, z) - r + __builtin_copysign(0x1.9p0, z);
return r;
}
#pragma GCC diagnostic pop
//********************************************************
func_entry_t func_table[] =
{
//ENTRY(libm),
ENTRY(atan_p),
ENTRY(atan_x),
ENTRY(atan_e),
};
const char* func_name = "atan";
float cr_func(float x) { return cr_atanf(x); }
//********************************************************
static const uint32_t f32_sign_bit_k = UINT32_C(0x80000000);
static inline uint32_t f32_to_bits(float x)
{
uint32_t u; memcpy(&u, &x, 4); return u;
}
static inline float f32_from_bits(uint32_t x)
{
float f; memcpy(&f, &x, 4); return f;
}
static inline uint32_t u32_abs(uint32_t x)
{
return (int32_t)x >= 0 ? x : -x;
}
// ulp distance provided a & b are finite
// and have the same sign
static inline uint32_t f32_ulp_dist_ss(float a, float b)
{
uint32_t ua = f32_to_bits(a);
uint32_t ub = f32_to_bits(b);
return u32_abs(ua-ub);
}
// ulp distance for finite a & b
static inline uint32_t f32_ulp_dist(float a, float b)
{
uint32_t ua = f32_to_bits(a);
uint32_t ub = f32_to_bits(b);
if ((int32_t)(ub^ua) >= 0)
return u32_abs(ua-ub);
return ua+ub+f32_sign_bit_k;
}
typedef struct {
float abs;
uint32_t max;
uint32_t ulp[4];
} func_error_t;
// tracks total error data across multiple intervals
func_error_t func_error[LENGTHOF(func_table)] = {{0}};
// add error data from current interval to the totals
void error_to_totals(func_error_t* e)
{
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++, e++) {
func_error[fi].ulp[0] += e->ulp[0];
func_error[fi].ulp[1] += e->ulp[1];
func_error[fi].ulp[2] += e->ulp[2];
func_error[fi].ulp[3] += e->ulp[3];
if (e->max > func_error[fi].max) func_error[fi].max = e->max;
if (e->abs > func_error[fi].abs) func_error[fi].abs = e->abs;
}
}
void error_dump_i(func_error_t* e)
{
printf("|%15s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%12s|\n",
"func", "max ULP", "CR", "FR", "2 ULP", "> 2 ULP",
"CR%", "FR%", "2 ULP%","> 2 ULP%", "abs");
printf("|%15s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%12s|\n",
"---:", "---:", "---:", "---:", "---:", "---:",
"---:", "---:", "---:","---:", "---:");
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++, e++) {
uint32_t u0 = e->ulp[0];
uint32_t u1 = e->ulp[1];
uint32_t u2 = e->ulp[2];
uint32_t u3 = e->ulp[3];
uint32_t t = (u0+u1+u2+u3);
double s = 100.0/(double)t;
printf("|%15s|%10u|%10u|%10u|%10u|%10u|%10f|%10f|%10f|%10f|%e|\n",
func_table[fi].name, e->max,
u0, u1, u2, u3,
s*u0, s*u1, s*u2, s*u3, e->abs
);
}
}
void error_dump(void)
{
printf("\nTOTAL: %s\n", func_name);
error_dump_i(func_error);
}
static inline void test_error_add(func_error_t* error, float e, float a)
{
float d = fabsf(e-a);
if (d == 0.f) { error->ulp[0]++; return; };
uint32_t ulp = f32_ulp_dist(e,a);
if (ulp > error->max) { error->max = ulp; }
if (d > error->abs) { error->abs = d; }
if (ulp > 3) ulp = 3;
error->ulp[ulp]++;
}
void test_force(uint32_t x0, uint32_t x1)
{
if (x1 < x0) { uint32_t t = x0; x0=x1; x1=t; }
float f0 = f32_from_bits(x0);
float f1 = f32_from_bits(x1);
printf("\nchecking: %s on [%08x,%08x] [%e,%e]\n", func_name, x0,x1,f0,f1);
func_error_t error[LENGTHOF(func_table)] = {{0}};
for(uint32_t ix=x0; ix<=x1; ix++) {
float x = f32_from_bits(ix);
float cr = cr_func(x);
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++) {
float r = func_table[fi].f(x);
test_error_add(error+fi, cr,r);
}
}
error_to_totals(error);
error_dump_i(error);
}
void test_linear_range(uint32_t x0, uint32_t x1, float k)
{
func_error_t error[LENGTHOF(func_table)] = {{0}};
float f0 = f32_from_bits(x0);
float f1 = f32_from_bits(x1);
printf("\nchecking: %s on [%08x,%08x] [%e,%e] : {linear result range %a*x}\n", func_name, x0,x1,f0,f1,k);
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++) {
for(uint32_t xi=x0; xi<=x1; xi++) {
float x = f32_from_bits(xi);
float r = func_table[fi].f(x);
float cr = k*x;
test_error_add(error+fi,cr,r);
}
}
error_to_totals(error);
error_dump_i(error);
}
// test a full power-of-two interval starting from x. (need not start on a boundary)
void test_1pot(float x)
{
uint32_t x0 = f32_to_bits(x);
uint32_t x1 = f32_to_bits(2.f*x)-1;
test_force(x0,x1);
}
// correctly rounded f(x) = k on [x0,x1]
void test_const_range(uint32_t x0, uint32_t x1, float k)
{
func_error_t error[LENGTHOF(func_table)] = {{0}};
float f0 = f32_from_bits(x0);
float f1 = f32_from_bits(x1);
printf("\nchecking: %s on [%08x,%08x] [%e,%e] : {constant result range %a}\n", func_name, x0,x1,f0,f1,k);
for(uint32_t fi=0; fi < LENGTHOF(func_table); fi++) {
for(uint32_t xi=x0; xi<=x1; xi++) {
float x = f32_from_bits(xi);
float r = func_table[fi].f(x);
test_error_add(error+fi,k,r);
}
}
error_to_totals(error);
error_dump_i(error);
}
void test_all(void)
{
uint32_t x0 = 0;
uint32_t x1 = 0x39b89ba2;
test_linear_range(x0, x1, 1.f);
x0 = x1+1; x1=f32_to_bits(1.f/64.f);
test_force(x0,x1);
// break-down the interior a bit. probably overkill WRT breakdown
test_1pot(1.f/64.f);
test_1pot(1.f/32.f);
test_1pot(1.f/16.f);
test_1pot(1.f/ 8.f);
test_1pot(1.f/ 4.f);
test_1pot(1.f/ 2.f);
}
int main(void)
{
printf("<details markdown=\"1\"><summary>click for range breakdown</summary>\n\n");
test_all();
printf("\n</details>\n\n");
// dump summary
error_dump();
return 0;
}
click for range breakdown

checking: atan on [00000000,39b89ba2] [0.000000e+00,3.521117e-04] : {linear result range 0x1p+0*x}

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 463150 19 36 36 968399688 0.000002 0.000004 0.000004 99.999991 9.720359e-06
atan_x 869840 10 18 18 968399733 0.000001 0.000002 0.000002 99.999995 1.922101e-05
atan_e 926918 9 17 17 968399736 0.000001 0.000002 0.000002 99.999996 2.055746e-05

checking: atan on [39b89ba3,3c800000] [3.521117e-04,1.562500e-02]

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 463150 0 0 0 46621790 0.000000 0.000000 0.000000 100.000000 4.308028e-04
atan_x 867983 0 0 0 46621790 0.000000 0.000000 0.000000 100.000000 7.907441e-04
atan_e 925032 0 0 0 46621790 0.000000 0.000000 0.000000 100.000000 8.483445e-04

checking: atan on [3c800000,3cffffff] [1.562500e-02,3.125000e-02]

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 462609 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 8.583758e-04
atan_x 753944 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 1.459263e-03
atan_e 809023 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 1.570972e-03

checking: atan on [3d000000,3d7fffff] [3.125000e-02,6.250000e-02]

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 460986 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 1.691040e-03
atan_x 644221 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 2.459977e-03
atan_e 697071 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 2.669439e-03

checking: atan on [3d800000,3dffffff] [6.250000e-02,1.250000e-01]

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 454503 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 3.180631e-03
atan_x 444822 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 3.324963e-03
atan_e 492504 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 3.688060e-03

checking: atan on [3e000000,3e7fffff] [1.250000e-01,2.500000e-01]

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 428760 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 4.879326e-03
atan_x 266419 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 3.347740e-03
atan_e 290786 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 3.741622e-03

checking: atan on [3e800000,3effffff] [2.500000e-01,5.000000e-01]

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 329009 0 0 0 8388608 0.000000 0.000000 0.000000 100.000000 4.952013e-03
atan_x 138966 65 85 77 8388381 0.000775 0.001013 0.000918 99.997294 2.790779e-03
atan_e 172714 50 99 76 8388383 0.000596 0.001180 0.000906 99.997318 2.678216e-03

checking: atan on [3f000000,3f7fffff] [5.000000e-01,9.999999e-01]

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 83082 53 93 84 8388378 0.000632 0.001109 0.001001 99.997258 4.952073e-03
atan_x 107120 45 91 90 8388382 0.000536 0.001085 0.001073 99.997306 3.347754e-03
atan_e 109058 6 54 82 8388466 0.000072 0.000644 0.000978 99.998307 3.741682e-03

TOTAL: atan

func max ULP CR FR 2 ULP > 2 ULP CR% FR% 2 ULP% > 2 ULP% abs
atan_p 463150 72 129 120 1065352896 0.000007 0.000012 0.000011 99.999970 4.952073e-03
atan_x 869840 120 194 185 1065352718 0.000011 0.000018 0.000017 99.999953 3.347754e-03
atan_e 926918 65 170 175 1065352807 0.000006 0.000016 0.000016 99.999962 3.741682e-03
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment