Created
April 1, 2017 17:26
-
-
Save Auburn/c32510483e839f61ed98eb11a9f32519 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <intrin.h> | |
#define INT128(a,b,c,d) { a & 255, a >> 8 & 255, a >> 16 & 255, a >> 24 & 255, b & 255, b >> 8 & 255, b >> 16 & 255, b >> 24 & 255, c & 255, c >> 8 & 255, c >> 16 & 255, c >> 24 & 255, d & 255, d >> 8 & 255, d >> 16 & 255, d >> 24 & 255 } | |
const __m128i ONEi = INT128( 1,1,1,1 ); | |
const __m128 ONE = { 1,1,1,1 }; | |
const __m128 SIX = { 6,6,6,6 }; | |
const __m128 TEN = { 10,10,10,10 }; | |
const __m128 FIFTEEN = { 15,15,15,15 }; | |
const __m128i Xi = INT128( 1,0,1,0 ); | |
const __m128i Yi = INT128( 1,1,0,0 ); | |
const __m128 X = { 1,0,1,0 }; | |
const __m128 Y = { 1,1,0,0 }; | |
const __m128i XPRIME = INT128( X_PRIME, X_PRIME, X_PRIME, X_PRIME ); | |
const __m128i YPRIME = INT128( Y_PRIME, Y_PRIME, Y_PRIME, Y_PRIME ); | |
const __m128i ZPRIME = INT128( Z_PRIME, Z_PRIME, Z_PRIME, Z_PRIME ); | |
const __m128i i60493 = INT128( 60493, 60493, 60493, 60493 ); | |
const __m128i i13 = INT128( 13, 13, 13, 13 ); | |
const __m128i i12 = INT128( 12, 12, 12, 12 ); | |
const __m128i i8 = INT128( 8, 8, 8, 8 ); | |
const __m128i i2 = INT128( 2, 2, 2, 2 ); | |
__m128 Grad3D(__m128i seed, __m128i xi, __m128i yi, __m128i zi, __m128 x, __m128 y, __m128 z) | |
{ | |
__m128i hash = seed; | |
hash = _mm_xor_si128(_mm_mullo_epi32(xi, XPRIME), hash); | |
hash = _mm_xor_si128(_mm_mullo_epi32(yi, YPRIME), hash); | |
hash = _mm_xor_si128(_mm_mullo_epi32(zi, ZPRIME), hash); | |
hash = _mm_mullo_epi32(_mm_mullo_epi32(_mm_mullo_epi32(hash, hash), i60493), hash); | |
hash = _mm_xor_si128(_mm_srli_epi32(hash, 13), hash); | |
__m128i hasha13 = _mm_and_si128(hash, i13); | |
//if h < 8 then x, else y | |
__m128 l8 = _mm_castsi128_ps(_mm_cmplt_epi32(hasha13, i8)); | |
__m128 u = _mm_blendv_ps(y, x, l8); | |
//if h < 4 then y else if h is 12 or 14 then x else z | |
__m128 l4 = _mm_castsi128_ps(_mm_cmplt_epi32(hasha13, i2)); | |
__m128 h12o14 = _mm_castsi128_ps(_mm_cmpeq_epi32(i12, hasha13)); | |
__m128 v = _mm_blendv_ps(_mm_blendv_ps(z, x, h12o14), y, l4); | |
//if h1 then -u else u | |
//if h2 then -v else v | |
__m128 h1 = _mm_castsi128_ps(_mm_slli_epi32(hash, 31)); | |
__m128 h2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_and_si128(hash, i2), 30)); | |
//then add them | |
return _mm_add_ps(_mm_xor_ps(u, h1), _mm_xor_ps(v, h2)); | |
} | |
FN_DECIMAL FastNoise::SinglePerlin(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) | |
{ | |
__m128 xyz = _mm_set_ps(0, z, y, x); | |
__m128 xyzS = _mm_floor_ps(xyz); | |
__m128i xyz0 = _mm_cvtps_epi32(xyzS); | |
__m128 xyzf0 = xyzS = _mm_sub_ps(xyz, xyzS); | |
__m128 interp = xyzS; | |
xyzS = _mm_sub_ps(_mm_mul_ps(interp, SIX), FIFTEEN); | |
xyzS = _mm_add_ps(_mm_mul_ps(xyzS, interp), TEN); | |
xyzS = _mm_mul_ps(xyzS, interp); | |
xyzS = _mm_mul_ps(xyzS, interp); | |
xyzS = _mm_mul_ps(xyzS, interp); | |
__m128 xf = _mm_sub_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xyzf0), _MM_SHUFFLE(0, 0, 0, 0))), X); | |
__m128 yf = _mm_sub_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xyzf0), _MM_SHUFFLE(1, 1, 1, 1))), Y); | |
__m128 zf = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xyzf0), _MM_SHUFFLE(2, 2, 2, 2))); | |
__m128i xi = _mm_add_epi32(Xi, _mm_shuffle_epi32(xyz0, _MM_SHUFFLE(0, 0, 0, 0))); | |
__m128i yi = _mm_add_epi32(Yi, _mm_shuffle_epi32(xyz0, _MM_SHUFFLE(1, 1, 1, 1))); | |
__m128i zi = _mm_shuffle_epi32(xyz0, _MM_SHUFFLE(2, 2, 2, 2)); | |
__m128i seed = _mm_set1_epi32(m_seed); | |
__m128 zr0 = Grad3D(seed, xi, yi, zi, xf, yf, zf); | |
zf = _mm_sub_ps(zf, ONE); | |
zi = _mm_add_epi32(zi, ONEi); | |
__m128 zr1 = Grad3D(seed, xi, yi, zi, xf, yf, zf); | |
__m128 zr; | |
zr = _mm_sub_ps(zr1, zr0); | |
zr = _mm_add_ps(_mm_mul_ps(zr, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xyzS), _MM_SHUFFLE(2, 2, 2, 2)))), zr0); | |
float xy00 = zr.m128_f32[3]; | |
float xy10 = zr.m128_f32[2]; | |
float xy01 = zr.m128_f32[1]; | |
float xy11 = zr.m128_f32[0]; | |
float x0 = Lerp(xy00, xy01, xyzS.m128_f32[1]); | |
float x1 = Lerp(xy10, xy11, xyzS.m128_f32[1]); | |
return Lerp(x0, x1, xyzS.m128_f32[0]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment