Skip to content

Instantly share code, notes, and snippets.

@syoyo
Created April 4, 2015 16:39
Show Gist options
  • Save syoyo/2f89e50edd74d4179d03 to your computer and use it in GitHub Desktop.
Save syoyo/2f89e50edd74d4179d03 to your computer and use it in GitHub Desktop.
SIMD log2() approximate function in HPC-ACE(not yet optimised)
// Based on glsl-sse2
inline __m128d mylog2(const __m128d v) {
int ibuf[4];
__m128d o = _mm_set_pd(1.0, 1.0);
__m128i infVal = _mm_set_epi32(0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000);
__m128d c = *(reinterpret_cast<__m128d*>(&infVal));
__m128d f = _mm_sub_pd(_mm_or_pd(_mm_andnot_pd(c, v),
_mm_and_pd(c, o)), o);
//const __m128i iVal = *(reinterpret_cast<const __m128i*>(&v));
//__m128i a = _mm_sub_epi32(_mm_srli_epi32(iVal, 20),
// _mm_set_epi32(1023, 1023, 1023, 1023));
_mm_store_pd(reinterpret_cast<double*>(ibuf), v);
int a0 = (ibuf[0] >> 20) - 1023;
int a1 = (ibuf[2] >> 20) - 1023;
const __m128d a = _mm_set_pd(a0, a1);
__m128d hi = _mm_add_pd(_mm_mul_pd(_mm_set1_pd( 3.61276447184348752E-05), f),
_mm_set1_pd(-4.16662127033480827E-04));
__m128d lo = _mm_add_pd(_mm_mul_pd(_mm_set1_pd(-1.43988260692073185E-01), f),
_mm_set1_pd( 1.60245637034704267E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd( 2.28193656337578229E-03));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd(-1.80329036970820794E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd(-7.93793829370930689E-03));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd( 2.06098446037376922E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd( 1.98461565426430164E-02));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd(-2.40449108727688962E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd(-3.84093543662501949E-02));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd( 2.88539004851839364E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd( 6.08335872067172597E-02));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd(-3.60673760117245982E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd(-8.27937055456904317E-02));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd( 4.80898346961226595E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd( 1.01392360727236079E-01));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd(-7.21347520444469934E-01));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd(-1.16530490533844182E-01));
lo = _mm_add_pd(_mm_mul_pd(f, lo), _mm_set1_pd( 0.44269504088896339E+00));
hi = _mm_add_pd(_mm_mul_pd(f, hi), _mm_set1_pd( 1.30009193360025350E-01));
__m128d x2 = _mm_mul_pd(f, f);
__m128d x10 = _mm_mul_pd(x2, x2);
x10 = _mm_mul_pd(x10, x10);
x10 = _mm_mul_pd(x2, x10);
return _mm_add_pd(_mm_add_pd(_mm_mul_pd(
_mm_add_pd(_mm_mul_pd(x10, hi), lo), f), f), a);
}
int
main(int argc, char** argv)
{
__m128d a = _mm_set_pd((double)argc, (double)argc);
__m128d bb = mylog2(a);
double ref = log((double)argc) / log(2.0);
double ret[2];
_mm_store_pd(ret, bb);
printf("[SIMD] double = %f, %f\n", ret[0], ret[1]);
printf("[REF] double = %f\n", ref);
printf("[DIFF] double = %f, %f\n", ref - ret[0], ref - ret[1]);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment