Skip to content

Instantly share code, notes, and snippets.

@native-m
Last active September 10, 2018 06:58
Show Gist options
  • Select an option

  • Save native-m/154a85a6b3fac1d23d6623241a9fdb1e to your computer and use it in GitHub Desktop.

Select an option

Save native-m/154a85a6b3fac1d23d6623241a9fdb1e to your computer and use it in GitHub Desktop.
Fast math functions approximation
// License under: WTFPL 1.0
// WARNING: I haven't checked the approximation error yet.
#ifndef __FASTMATH_H__
#define __FASTMATH_H__
#define _USE_MATH_DEFINES
#include <math.h>
inline double fast_fmod(double x, double y)
{
__m128d xA = _mm_load_sd(&x); // Load x
__m128d yA = _mm_load_sd(&y); // Load y
__m128d ret = _mm_div_sd(xA, yA); // x / y
// (ret - floor(ret)) * y
ret = _mm_mul_sd(_mm_sub_sd(ret, _mm_cvtsi32_sd(ret, _mm_cvttsd_si32(ret))), *(__m128d*)&y);
return *(double*)&ret;
}
inline float fast_fmod(float x, float y)
{
__m128 xA = _mm_load_ss(&x); // Load x
__m128 yA = _mm_load_ss(&y); // Load y
__m128 ret = _mm_div_ss(xA, yA); // x / y
// (ret - floor(ret)) * y
ret = _mm_mul_ss(_mm_sub_ss(ret, _mm_cvtsi32_ss(ret, _mm_cvttss_si32(ret))), *(__m128*)&y);
return *(float*)&ret;
}
inline double fast_sin(double x)
{
double xSq;
double result, tmp;
const double twopi = 2.0 * M_PI;
tmp = fast_fmod(x, twopi) - M_PI;
xSq = tmp * tmp;
result = -1.605904383682161334e-10;
result *= xSq;
result += 2.505210838544172022e-08;
result *= xSq;
result -= 2.755731922398589251e-06;
result *= xSq;
result += 0.0001984126984126984125;
result *= xSq;
result -= 0.008333333333333333218;
result *= xSq;
result += 0.1666666666666666574;
result *= xSq;
result -= 1.0;
return result * tmp;
}
inline float fast_sin(float x)
{
float xSq;
float result, tmp;
const float twopi = 2.0 * (float)M_PI;
tmp = fast_fmod(x, twopi) - (float)M_PI;
xSq = tmp * tmp;
result = -1.605904437e-10f;
result *= xSq;
result += 2.505210794e-08f;
result *= xSq;
result -= 2.755731884e-06f;
result *= xSq;
result += 0.0001984127011f;
result *= xSq;
result -= 0.00833333333786f;
result *= xSq;
result += 0.166666666716f;
result *= xSq;
result -= 1.0f;
return result * tmp;
}
inline double fast_cos(double x)
{
double xSq;
double result;
const double twopi = 2.0 * M_PI;
result = fast_fmod(x, twopi) - M_PI;
xSq = result * result;
result = -2.087675698786810019e-09;
result *= xSq;
result += 2.755731922398588828e-07;
result *= xSq;
result -= 2.480158730158730157e-05;
result *= xSq;
result += 0.001388888888888888942;
result *= xSq;
result -= 0.04166666666666666435;
result *= xSq;
result += 0.5;
result *= xSq;
result -= 1.0;
return result;
}
inline float fast_cos(float x)
{
float xSq;
float result;
const float twopi = 2.0 * (float)M_PI;
result = fast_fmod(x, twopi) - (float)M_PI;
xSq = result * result;
result = -2.087675588e-09f;
result *= xSq;
result += 2.755731998e-07f;
result *= xSq;
result -= 2.480158764e-05f;
result *= xSq;
result += 0.001388888923f;
result *= xSq;
result -= 0.04166666791f;
result *= xSq;
result += 0.5f;
result *= xSq;
result -= 1.0f;
return result;
}
inline double fast_sqrt(double x)
{
__m128d d = _mm_load_sd(&x);
d = _mm_sqrt_sd(d, d);
return *(double*)&d;
}
inline float fast_sqrt(float x)
{
__m128 d = _mm_load_ss(&x);
d = _mm_sqrt_ss(d);
return *(float*)&d;
}
inline double fast_rsqrt(double x)
{
// Since we dont have a native SSE instruction for inverse sqrt.
// We have to replace it with 1 / sqrt(x) instead. Maybe i will replace it with NR method later.
return 1.0 / fast_sqrt(x);
}
inline float fast_rsqrt(float x)
{
long i;
float x2, y;
const float threehalfs = 1.5f;
__m128 r;
x2 = x * 0.5; // x / 2
r = _mm_load_ss(&x); // Load x
r = _mm_rsqrt_ss(r); // Compute early inverse sqrt with SSE instruction. Btw, this boi is less accurate than the original 1 / sqrt(x).
y = *(float*)&r; // Store early inverse sqrt to y
y = y * (threehalfs - (x2 * y * y)); // Do NR step (1st iteration)
//y = y * (threehalfs - (x2 * y * y)); // Do NR step (2nd iteration, this can be removed. More steps, more accurate)
return y;
}
#endif // __FASTMATH_H__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment