Last active
August 29, 2015 14:25
-
-
Save OlivierLi/27a6d918558e5b6f5d40 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "stdafx.h" | |
| #include <iostream> | |
| #include <chrono> | |
| #include <xmmintrin.h> | |
| #include <cstring> | |
| void test_sse( unsigned char *output, int *input ) | |
| { | |
| //Create alligned arrays on the stack and the pointers needed to access them | |
| const float factor = 255.f / 1024.f; | |
| __declspec(align(16)) float results[8] = { 0.0f }; | |
| //Const cast the values so they can be used by sse | |
| const __m256i* wrd_ptr = ( __m256i* )input; | |
| //Load the integers into an xmm register | |
| __m256i loaded_ints = _mm256_load_si256( wrd_ptr ); | |
| //Convert the integers to float values | |
| __m256 values_reg = _mm256_cvtepi32_ps( loaded_ints ); | |
| //Set the factors | |
| __m256 factors_reg = _mm256_set1_ps( factor ); | |
| //Apply the multiplication | |
| __m256 result_reg = _mm256_mul_ps( values_reg, factors_reg ); | |
| _mm256_store_ps( results, result_reg ); | |
| for( int i = 0; i < 8; ++i ) | |
| { | |
| output[i] = static_cast<unsigned char>( results[i] ); | |
| } | |
| } | |
| void test( unsigned char *output, int *input ) | |
| { | |
| __declspec( align( 16 ) ) float results[8] = { 0.0f }; | |
| const float factor = 255 / 1024.f; | |
| results[0] = input[0] * factor; | |
| results[1] = input[1] * factor; | |
| results[2] = input[2] * factor; | |
| results[3] = input[3] * factor; | |
| results[4] = input[4] * factor; | |
| results[5] = input[5] * factor; | |
| results[6] = input[6] * factor; | |
| results[7] = input[7] * factor; | |
| for( int i = 0; i < 8; ++i ) | |
| { | |
| output[i] = static_cast<unsigned char>( results[i] ); | |
| } | |
| } | |
| int main( int argc, const char * argv[] ) | |
| { | |
| unsigned char *output = new unsigned char[8]; | |
| int *values = new int[8]; | |
| auto start = std::chrono::high_resolution_clock::now(); | |
| for( int i = 0; i < 10000000; ++i ) | |
| { | |
| values[0] = 1; | |
| values[1] = 254; | |
| values[2] = 777; | |
| values[3] = 1024; | |
| values[4] = 1; | |
| values[5] = 254; | |
| values[6] = 777; | |
| values[7] = 1024; | |
| //test( output, values ); | |
| test_sse( output, values ); | |
| } | |
| auto end = std::chrono::high_resolution_clock::now(); | |
| auto duration = end - start; | |
| std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(duration ).count() << std::endl; | |
| //for( int i = 0; i<8; ++i ) | |
| //{ | |
| // std::cout << (int) output[i] << std::endl; | |
| //} | |
| delete[] values; | |
| delete[] output; | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment