OlivierLi · August 29, 2015 14:26
diff --git a/example_func.cpp b/example_func.cpp
 void test_sse( int *output, const int *input )
 {
 	//SSE -> AVX transition
 	//_mm256_zeroall();

 	//Load the integers into an xmm register
 	__m256i loaded_ints = _mm256_loadu_si256( ( const __m256i* )input );

 	//Convert the integers to float values
 	__m256 values_reg = _mm256_cvtepi32_ps( loaded_ints );

 	//Load the factors
 	__m256 factors_reg = _mm256_set1_ps( 255.f / 1024.f );

 	//Apply the multiplication
 	__m256 result_reg = _mm256_mul_ps( values_reg, factors_reg );

 	//Convert results back to int
 	__m256i final_results = _mm256_cvtps_epi32( result_reg );

 	//Add 5 to the result
 	__m256i fives = _mm256_set1_epi32( 5 );
 	final_results = _mm256_add_epi32( final_results, fives );

 	//Store results
 	_mm256_store_si256( ( __m256i* )output, final_results );

 	//AVX -> SSE transition
 	//_mm256_zeroall();
 }
	void test_sse( int output, const int input )
	{
	//SSE -> AVX transition
	//_mm256_zeroall();

	//Load the integers into an xmm register
	__m256i loaded_ints = _mm256_loadu_si256( ( const __m256i* )input );

	//Convert the integers to float values
	__m256 values_reg = _mm256_cvtepi32_ps( loaded_ints );

	//Load the factors
	__m256 factors_reg = _mm256_set1_ps( 255.f / 1024.f );

	//Apply the multiplication
	__m256 result_reg = _mm256_mul_ps( values_reg, factors_reg );

	//Convert results back to int
	__m256i final_results = _mm256_cvtps_epi32( result_reg );

	//Add 5 to the result
	__m256i fives = _mm256_set1_epi32( 5 );
	final_results = _mm256_add_epi32( final_results, fives );

	//Store results
	_mm256_store_si256( ( __m256i* )output, final_results );

	//AVX -> SSE transition
	//_mm256_zeroall();
	}