Last active
January 1, 2016 00:29
-
-
Save lydonchandra/8066459 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//const __m128 half = _mm_set1_ps(0.5f); | |
//set our packed floats to 1.5f | |
__m256 half256ps = _mm256_set1_ps(1.5f); | |
//make room for our 256bits result variable | |
__m256i result256i; | |
unsigned X, X0, X1, X2, X3, X4, X5, X6, X7; | |
__asm { | |
//movd xmm0, X | |
//broadcast X (unsigned 32bit) into 256bit ymm0 register | |
vpbroadcastd ymm0, X | |
//mov ecx, 5 | |
//vbroadcastss ymm1, half | |
//set ymm1 register to packed 1.5f | |
vmovdqu ymm1, half256ps | |
//vaddpd ymm2, ymm0, ymm0 | |
//simd multiply ymm2 = ymm0 * ymm1 | |
vmulps ymm2, ymm0, ymm1 | |
//movd X, xmm2 | |
//store multiplication value into result256i | |
vmovdqu result256i, ymm2 | |
} | |
//result256i is an union, this is how we print it. | |
printf("%u %u %u %u %u %u %u %u\n", | |
result256i.m256i_u32[0], result256i.m256i_u32[1], | |
result256i.m256i_u32[2], result256i.m256i_u32[3], | |
result256i.m256i_u32[4], result256i.m256i_u32[5], | |
result256i.m256i_u32[6], result256i.m256i_u32[7] ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment