Skip to content

Instantly share code, notes, and snippets.

@lydonchandra
Last active January 1, 2016 00:29
Show Gist options
  • Save lydonchandra/8066459 to your computer and use it in GitHub Desktop.
Save lydonchandra/8066459 to your computer and use it in GitHub Desktop.
//const __m128 half = _mm_set1_ps(0.5f);
//set our packed floats to 1.5f
__m256 half256ps = _mm256_set1_ps(1.5f);
//make room for our 256bits result variable
__m256i result256i;
unsigned X, X0, X1, X2, X3, X4, X5, X6, X7;
__asm {
//movd xmm0, X
//broadcast X (unsigned 32bit) into 256bit ymm0 register
vpbroadcastd ymm0, X
//mov ecx, 5
//vbroadcastss ymm1, half
//set ymm1 register to packed 1.5f
vmovdqu ymm1, half256ps
//vaddpd ymm2, ymm0, ymm0
//simd multiply ymm2 = ymm0 * ymm1
vmulps ymm2, ymm0, ymm1
//movd X, xmm2
//store multiplication value into result256i
vmovdqu result256i, ymm2
}
//result256i is an union, this is how we print it.
printf("%u %u %u %u %u %u %u %u\n",
result256i.m256i_u32[0], result256i.m256i_u32[1],
result256i.m256i_u32[2], result256i.m256i_u32[3],
result256i.m256i_u32[4], result256i.m256i_u32[5],
result256i.m256i_u32[6], result256i.m256i_u32[7] );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment