Last active
August 29, 2015 14:26
-
-
Save OlivierLi/142f7f7b4298d15a3d39 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| void test_sse( int *output, const int *input ) | |
| { | |
| //SSE -> AVX transition | |
| //_mm256_zeroall(); | |
| //Load the integers into an xmm register | |
| __m256i loaded_ints = _mm256_loadu_si256( ( const __m256i* )input ); | |
| //Convert the integers to float values | |
| __m256 values_reg = _mm256_cvtepi32_ps( loaded_ints ); | |
| //Load the factors | |
| __m256 factors_reg = _mm256_set1_ps( 255.f / 1024.f ); | |
| //Apply the multiplication | |
| __m256 result_reg = _mm256_mul_ps( values_reg, factors_reg ); | |
| //Convert results back to int | |
| __m256i final_results = _mm256_cvtps_epi32( result_reg ); | |
| //Add 5 to the result | |
| __m256i fives = _mm256_set1_epi32( 5 ); | |
| final_results = _mm256_add_epi32( final_results, fives ); | |
| //Store results | |
| _mm256_store_si256( ( __m256i* )output, final_results ); | |
| //AVX -> SSE transition | |
| //_mm256_zeroall(); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment