Last active
October 17, 2015 12:48
-
-
Save reinsteam/d12f65c6b02613ae7218 to your computer and use it in GitHub Desktop.
Alternative transpose methods comparing to _MM_TRANSPOSE4_PS defined in xmmintrin.h without shufps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void transpose_4x4_ver0(__m128 & v0, __m128 & v1, __m128 v2, __m128 v3) | |
{ | |
__m128 a0 = _mm_unpacklo_ps(v0, v1); /* a0 = { x0, x1, y0, y1 } */ | |
__m128 a1 = _mm_unpackhi_ps(v0, v1); /* a1 = { z0, z1, z0, z1 } */ | |
__m128 a2 = _mm_unpacklo_ps(v2, v3); /* a2 = { x2, x3, y2, y3 } */ | |
__m128 a3 = _mm_unpackhi_ps(v2, v3); /* a3 = { z2, z3, z2, z3 } */ | |
v0 = _mm_unpacklo_ps(a0, a2); /* v0 = { x0, x1, x2, x3 } */ | |
v1 = _mm_unpackhi_ps(a0, a2); /* v1 = { y0, y1, y2, y3 } */ | |
v2 = _mm_unpacklo_ps(a1, a3); /* v2 = { z0, z1, z2, z3 } */ | |
v3 = _mm_unpackhi_ps(a1, a3); /* v3 = { w0, w1, w2, w3 } */ | |
} | |
static void transpose_4x4_ver1(__m128 & v0, __m128 & v1, __m128 v2, __m128 v3) | |
{ | |
__m128 a0 = _mm_unpacklo_ps(v0, v2); /* a0 = { x0, x2, y0, y2 } */ | |
__m128 a1 = _mm_unpacklo_ps(v1, v3); /* a1 = { x1, x3, y1, y3 } */ | |
__m128 a2 = _mm_unpackhi_ps(v0, v2); /* a2 = { z0, z2, w0, w2 } */ | |
__m128 a3 = _mm_unpackhi_ps(v1, v3); /* a3 = { z1, z3, w1, w3 } */ | |
v0 = _mm_unpacklo_ps(a0, a1); /* v0 = { x0, x1, x2, x3 } */ | |
v1 = _mm_unpackhi_ps(a0, a1); /* v1 = { y0, y1, y2, y3 } */ | |
v2 = _mm_unpacklo_ps(a2, a3); /* v2 = { z0, z1, z2, z3 } */ | |
v3 = _mm_unpackhi_ps(a2, a3); /* v3 = { w0, w1, w2, w3 } */ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment