Skip to content

Instantly share code, notes, and snippets.

@OlivierLi
Last active August 29, 2015 14:25
Show Gist options
  • Save OlivierLi/27a6d918558e5b6f5d40 to your computer and use it in GitHub Desktop.
Save OlivierLi/27a6d918558e5b6f5d40 to your computer and use it in GitHub Desktop.
#include "stdafx.h"
#include <iostream>
#include <chrono>
#include <xmmintrin.h>
#include <cstring>
void test_sse( unsigned char *output, int *input )
{
//Create alligned arrays on the stack and the pointers needed to access them
const float factor = 255.f / 1024.f;
__declspec(align(16)) float results[8] = { 0.0f };
//Const cast the values so they can be used by sse
const __m256i* wrd_ptr = ( __m256i* )input;
//Load the integers into an xmm register
__m256i loaded_ints = _mm256_load_si256( wrd_ptr );
//Convert the integers to float values
__m256 values_reg = _mm256_cvtepi32_ps( loaded_ints );
//Set the factors
__m256 factors_reg = _mm256_set1_ps( factor );
//Apply the multiplication
__m256 result_reg = _mm256_mul_ps( values_reg, factors_reg );
_mm256_store_ps( results, result_reg );
for( int i = 0; i < 8; ++i )
{
output[i] = static_cast<unsigned char>( results[i] );
}
}
void test( unsigned char *output, int *input )
{
__declspec( align( 16 ) ) float results[8] = { 0.0f };
const float factor = 255 / 1024.f;
results[0] = input[0] * factor;
results[1] = input[1] * factor;
results[2] = input[2] * factor;
results[3] = input[3] * factor;
results[4] = input[4] * factor;
results[5] = input[5] * factor;
results[6] = input[6] * factor;
results[7] = input[7] * factor;
for( int i = 0; i < 8; ++i )
{
output[i] = static_cast<unsigned char>( results[i] );
}
}
int main( int argc, const char * argv[] )
{
unsigned char *output = new unsigned char[8];
int *values = new int[8];
auto start = std::chrono::high_resolution_clock::now();
for( int i = 0; i < 10000000; ++i )
{
values[0] = 1;
values[1] = 254;
values[2] = 777;
values[3] = 1024;
values[4] = 1;
values[5] = 254;
values[6] = 777;
values[7] = 1024;
//test( output, values );
test_sse( output, values );
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = end - start;
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(duration ).count() << std::endl;
//for( int i = 0; i<8; ++i )
//{
// std::cout << (int) output[i] << std::endl;
//}
delete[] values;
delete[] output;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment