Skip to content

Instantly share code, notes, and snippets.

@reinsteam
Last active October 17, 2015 12:48
Show Gist options
  • Save reinsteam/d12f65c6b02613ae7218 to your computer and use it in GitHub Desktop.
Save reinsteam/d12f65c6b02613ae7218 to your computer and use it in GitHub Desktop.
Alternative transpose methods comparing to _MM_TRANSPOSE4_PS defined in xmmintrin.h without shufps
static void transpose_4x4_ver0(__m128 & v0, __m128 & v1, __m128 v2, __m128 v3)
{
__m128 a0 = _mm_unpacklo_ps(v0, v1); /* a0 = { x0, x1, y0, y1 } */
__m128 a1 = _mm_unpackhi_ps(v0, v1); /* a1 = { z0, z1, z0, z1 } */
__m128 a2 = _mm_unpacklo_ps(v2, v3); /* a2 = { x2, x3, y2, y3 } */
__m128 a3 = _mm_unpackhi_ps(v2, v3); /* a3 = { z2, z3, z2, z3 } */
v0 = _mm_unpacklo_ps(a0, a2); /* v0 = { x0, x1, x2, x3 } */
v1 = _mm_unpackhi_ps(a0, a2); /* v1 = { y0, y1, y2, y3 } */
v2 = _mm_unpacklo_ps(a1, a3); /* v2 = { z0, z1, z2, z3 } */
v3 = _mm_unpackhi_ps(a1, a3); /* v3 = { w0, w1, w2, w3 } */
}
static void transpose_4x4_ver1(__m128 & v0, __m128 & v1, __m128 v2, __m128 v3)
{
__m128 a0 = _mm_unpacklo_ps(v0, v2); /* a0 = { x0, x2, y0, y2 } */
__m128 a1 = _mm_unpacklo_ps(v1, v3); /* a1 = { x1, x3, y1, y3 } */
__m128 a2 = _mm_unpackhi_ps(v0, v2); /* a2 = { z0, z2, w0, w2 } */
__m128 a3 = _mm_unpackhi_ps(v1, v3); /* a3 = { z1, z3, w1, w3 } */
v0 = _mm_unpacklo_ps(a0, a1); /* v0 = { x0, x1, x2, x3 } */
v1 = _mm_unpackhi_ps(a0, a1); /* v1 = { y0, y1, y2, y3 } */
v2 = _mm_unpacklo_ps(a2, a3); /* v2 = { z0, z1, z2, z3 } */
v3 = _mm_unpackhi_ps(a2, a3); /* v3 = { w0, w1, w2, w3 } */
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment