Skip to content

Instantly share code, notes, and snippets.

@velipso
Last active March 25, 2024 12:50
Show Gist options
  • Save velipso/fc5a58b7d9fc020ecf7f2f5fc907dfa5 to your computer and use it in GitHub Desktop.
Save velipso/fc5a58b7d9fc020ecf7f2f5fc907dfa5 to your computer and use it in GitHub Desktop.
Faster atan2 algorithm (worse accuracy)
// public domain
//
// algorithm adopted from:
// https://dspguru.com/dsp/tricks/fixed-point-atan2-with-self-normalization/
//
// when ran without arguments, it simply calculates the maximum error between
// the functions below and atan2f
//
// it tests all (x,y) values from -100.00 to +100.00
//
// output:
// [fast] max err: 0.071115
// [slow] max err: 0.010163
//
// according to my (very simple) timing tests, these two functions are faster
// than atan2f
//
// when compiled without any optimizations, the timing on my machine is:
// atan2f takes about 10.20 seconds
// fast_atan2 takes about 6.99 seconds
// slow_atan2 takes about 9.40 seconds
//
// compiled with -O2, the same tests take:
// atan2f takes about 3.44 seconds
// fast_atan2 takes about 1.17 seconds
// slow_atan2 takes about 1.45 seconds
#include <stdio.h>
#include <stdint.h>
#include <math.h>
static inline float fast_atan2(float y, float x){
static const float c1 = M_PI / 4.0;
static const float c2 = M_PI * 3.0 / 4.0;
if (y == 0 && x == 0)
return 0;
float abs_y = fabsf(y);
float angle;
if (x >= 0)
angle = c1 - c1 * ((x - abs_y) / (x + abs_y));
else
angle = c2 - c1 * ((x + abs_y) / (abs_y - x));
if (y < 0)
return -angle;
return angle;
}
static inline float slow_atan2(float y, float x){
static const float c1 = M_PI / 4.0;
static const float c2 = M_PI * 3.0 / 4.0;
static const float c3 = M_PI / 16.0;
static const float c4 = M_PI * 5.0 / 16.0;
if (y == 0 && x == 0)
return 0;
float abs_y = fabsf(y);
float angle;
if (x >= 0){
float r = ((x - abs_y) / (x + abs_y));
angle = c3 * r * r * r - c4 * r + c1;
}
else{
float r = ((x + abs_y) / (abs_y - x));
angle = c3 * r * r * r - c4 * r + c2;
}
if (y < 0)
return -angle;
return angle;
}
int main(int argc, char **argv){
const int step = 97;
if (argc == 2 && argv[1][0] == '1'){
printf("testing atan2f\n");
float total = 0;
for (int y = -100000; y <= 100000; y++){
for (int x = -100000; x <= 100000; x += step){
total += atan2f(y * 0.01f, x * 0.01f);
}
}
// output something to prevent out of control optimizations
printf("%f\n", total);
return 0;
}
else if (argc == 2 && argv[1][0] == '2'){
printf("testing fast_atan2\n");
float total = 0;
for (int y = -100000; y <= 100000; y++){
for (int x = -100000; x <= 100000; x += step){
total += fast_atan2(y * 0.01f, x * 0.01f);
}
}
printf("%f\n", total);
return 0;
}
else if (argc == 2 && argv[1][0] == '3'){
printf("testing slow_atan2\n");
float total = 0;
for (int y = -100000; y <= 100000; y++){
for (int x = -100000; x <= 100000; x += step){
total += slow_atan2(y * 0.01f, x * 0.01f);
}
}
printf("%f\n", total);
return 0;
}
float max_fast_err = 0;
float max_slow_err = 0;
for (int y = -10000; y <= 10000; y++){
for (int x = -10000; x <= 10000; x++){
float fx = x * 0.01f;
float fy = y * 0.01f;
float ans = atan2f(fy, fx);
float fast = fast_atan2(fy, x);
float slow = slow_atan2(fy, x);
float fe = fabsf(ans - fast);
float se = fabsf(ans - slow);
if (fe > max_fast_err)
max_fast_err = fe;
if (se > max_slow_err)
max_slow_err = se;
}
}
printf(
"[fast] max err: %f\n"
"[slow] max err: %f\n",
max_fast_err, max_slow_err);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment