Skip to content

Instantly share code, notes, and snippets.

@Const-me
Last active November 21, 2024 13:22
Show Gist options
  • Save Const-me/eb8b77669760486ead570431299bb015 to your computer and use it in GitHub Desktop.
Save Const-me/eb8b77669760486ead570431299bb015 to your computer and use it in GitHub Desktop.
// Compiled with VS 2022: Release AMD64, AVX2 ISA, LTCG
// RDTSC time on Ryzen 7 8700G for 1024 matrices: 15834 Eigen, 7224 DirectXMath
constexpr bool useEigen = true;
// Eigen 3.4.0
#include <Eigen/Eigen>
__forceinline void multiplyWithEigen( float* rdi, const float* rsi )
{
using Mat = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
static_assert( sizeof( Mat ) == 4 * 4 * sizeof( float ) );
const Mat& lhs = *(const Mat*)( rsi );
const Mat& rhs = *(const Mat*)( rsi + 16 );
Mat& prod = *(Mat*)rdi;
prod.noalias() = lhs * rhs;
}
// DirectXMath current master
#include "DirectXMath/DirectXMath.h"
__forceinline void multiplyWithDxMath( float* rdi, const float* rsi )
{
using namespace DirectX;
XMMATRIX lhs = XMLoadFloat4x4( (const XMFLOAT4X4*)( rsi ) );
XMMATRIX rhs = XMLoadFloat4x4( (const XMFLOAT4X4*)( rsi + 16 ) );
XMMATRIX prod = XMMatrixMultiply( lhs, rhs );
XMStoreFloat4x4( (XMFLOAT4X4*)rdi, prod );
}
static void __declspec( noinline ) multiplyMatrices( float* rdi, size_t count, const float* rsi )
{
const float* const rsiEnd = rsi + count * ( 16 * 2 );
for( ; rsi < rsiEnd; rsi += ( 16 * 2 ), rdi += 16 )
{
if constexpr( useEigen )
multiplyWithEigen( rdi, rsi );
else
multiplyWithDxMath( rdi, rsi );
}
}
#include <vector>
#include <random>
// Generate FP32 vector filled with [ -1 .. +1 ] random values
std::vector<float> makeRandomVector( size_t length )
{
std::vector<float> vec( length );
std::random_device rd; // Seed
std::mt19937 gen( rd() ); // Mersenne Twister RNG
std::uniform_real_distribution<float> dis( -1.0f, 1.0f ); // Range [-1, +1]
for( float& value : vec )
value = dis( gen );
return vec;
}
#include <stdio.h>
int main()
{
constexpr size_t countMatrices = 1024;
const std::vector<float> source = makeRandomVector( 4 * 4 * 2 * countMatrices );
std::vector<float> result;
result.resize( 4 * 4 * countMatrices );
const uint64_t start = __rdtsc();
_ReadBarrier();
multiplyMatrices( result.data(), countMatrices, source.data() );
_ReadBarrier();
const uint64_t end = __rdtsc();
printf( "%lli cycles; %g\n", end - start, *result.rbegin() );
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment