Last active
October 25, 2023 10:36
-
-
Save garrettsickles/85a9ab8385172bd0e762f38e4cfb045f to your computer and use it in GitHub Desktop.
AVX2 Optimized Cross Product (C, C++)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// --------------------------------------------------------------- // | |
// Need To Know | |
// _MSC_VER: Microsoft C/C++ Compiler | |
// __AVX2__: AVX2 Instruction Set Flag | |
// __FMA__: Fused Multiply Add Flag | |
// --------------------------------------------------------------- // | |
// On Windows, __AVX2__ is defined but __FMA__ so define it | |
#if defined(_MSC_VER) && defined(__AVX2__) && !defined(__FMA__) | |
#define __FMA__ | |
#endif | |
// AVX2 and FMA instruction set | |
#if defined(__AVX2__) && defined(__FMA__) | |
// Definition of AVX2/FMA intrinsics | |
#include <immintrin.h>; | |
// --------------------------------------------------------------- // | |
// a x b: Cross product of vector a with vector b // | |
// Notes: // | |
// - Vectors a and b should be of type __m256d // | |
// Summary: // | |
// This macro consists of a one-liner to compute the cross // | |
// product of vectors a and b by shuffling the elements in each // | |
// vector and multiplying it with the other. It then subtracts // | |
// the two resultant vectors and shuffles the difference into // | |
// into the returned vector. // | |
// --------------------------------------------------------------- // | |
#define CROSS_PRODUCT(a,b) _mm256_permute4x64_pd(\ | |
_mm256_sub_pd(\ | |
_mm256_mul_pd(a, _mm256_permute4x64_pd(b, _MM_SHUFFLE(3, 0, 2, 1))),\ | |
_mm256_mul_pd(b, _mm256_permute4x64_pd(a, _MM_SHUFFLE(3, 0, 2, 1)))\ | |
), _MM_SHUFFLE(3, 0, 2, 1)\ | |
) | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment