Last active
March 16, 2018 15:28
-
-
Save andersx/c477b2b9dfb831768576 to your computer and use it in GitHub Desktop.
EXP(x) for AVX2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Approximation for EXP(x), only valid for -126.0f < x < 0.0f. | |
static inline __m256 _mm256_expfast_ps(const __m256 &q) { | |
const __m256 INVLOG_2 = _mm256_set1_ps(1.442695040f); | |
const __m256 BIT_SHIFT = _mm256_set1_ps(8388608); | |
const __m256 ONE = _mm256_set1_ps(1.0f); | |
const __m256 C1 = _mm256_set1_ps(121.2740838f); | |
const __m256 C2 = _mm256_set1_ps(27.7280233f); | |
const __m256 C3 = _mm256_set1_ps(4.84252568f); | |
const __m256 C4 = _mm256_set1_ps(1.49012907f); | |
const __m256 p = _mm256_mul_ps(INVLOG_2, q); | |
const __m256i w = _mm256_cvttps_epi32(p); | |
const __m256 z = _mm256_sub_ps(_mm256_add_ps(p, ONE), _mm256_cvtepi32_ps(w)); | |
__m256 rcp = _mm256_rcp_ps(_mm256_sub_ps(C3, z)); | |
rcp = _mm256_fmadd_ps(rcp, C2, _mm256_add_ps(C1, p)); | |
rcp = _mm256_fnmadd_ps(C4, z, rcp); | |
return _mm256_castsi256_ps(_mm256_cvttps_epi32(_mm256_mul_ps(BIT_SHIFT, rcp))); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment