garrettsickles · October 25, 2023 10:36
diff --git a/avx2_optimized_cross_product.h b/avx2_optimized_cross_product.h
 // --------------------------------------------------------------- //
 // Need To Know
 //    _MSC_VER: Microsoft C/C++ Compiler
 //    __AVX2__: AVX2 Instruction Set Flag
 //    __FMA__: Fused Multiply Add Flag
 // --------------------------------------------------------------- //

 // On Windows, __AVX2__ is defined but __FMA__ so define it
 #if defined(_MSC_VER) && defined(__AVX2__) && !defined(__FMA__)
 	#define __FMA__
 #endif

 // AVX2 and FMA instruction set
 #if defined(__AVX2__) && defined(__FMA__)

 	// Definition of AVX2/FMA intrinsics 
 	#include <immintrin.h>;

 	// --------------------------------------------------------------- //
 	// a x b: Cross product of vector a with vector b                  //
 	// Notes:                                                          //
 	//    - Vectors a and b should be of type __m256d                  //
 	// Summary:                                                        //  
 	//      This macro consists of a one-liner to compute the cross    //
 	//    product of vectors a and b by shuffling the elements in each //
 	//    vector and multiplying it with the other. It then subtracts  //
 	//    the two resultant vectors and shuffles the difference into   //
 	//    into the returned vector.                                    // 
 	// --------------------------------------------------------------- //
 	#define CROSS_PRODUCT(a,b) _mm256_permute4x64_pd(\
 		_mm256_sub_pd(\
 			_mm256_mul_pd(a, _mm256_permute4x64_pd(b, _MM_SHUFFLE(3, 0, 2, 1))),\
 			_mm256_mul_pd(b, _mm256_permute4x64_pd(a, _MM_SHUFFLE(3, 0, 2, 1)))\
 		), _MM_SHUFFLE(3, 0, 2, 1)\
 	)
 #endif
	// --------------------------------------------------------------- //
	// Need To Know
	// _MSC_VER: Microsoft C/C++ Compiler
	// __AVX2__: AVX2 Instruction Set Flag
	// __FMA__: Fused Multiply Add Flag
	// --------------------------------------------------------------- //

	// On Windows, __AVX2__ is defined but __FMA__ so define it
	#if defined(_MSC_VER) && defined(__AVX2__) && !defined(__FMA__)
	#define __FMA__
	#endif

	// AVX2 and FMA instruction set
	#if defined(__AVX2__) && defined(__FMA__)

	// Definition of AVX2/FMA intrinsics
	#include <immintrin.h>;

	// --------------------------------------------------------------- //
	// a x b: Cross product of vector a with vector b //
	// Notes: //
	// - Vectors a and b should be of type __m256d //
	// Summary: //
	// This macro consists of a one-liner to compute the cross //
	// product of vectors a and b by shuffling the elements in each //
	// vector and multiplying it with the other. It then subtracts //
	// the two resultant vectors and shuffles the difference into //
	// into the returned vector. //
	// --------------------------------------------------------------- //
	#define CROSS_PRODUCT(a,b) _mm256_permute4x64_pd(\
	_mm256_sub_pd(\
	_mm256_mul_pd(a, _mm256_permute4x64_pd(b, _MM_SHUFFLE(3, 0, 2, 1))),\
	_mm256_mul_pd(b, _mm256_permute4x64_pd(a, _MM_SHUFFLE(3, 0, 2, 1)))\
	), _MM_SHUFFLE(3, 0, 2, 1)\
	)
	#endif