Last active
November 21, 2024 13:22
-
-
Save Const-me/eb8b77669760486ead570431299bb015 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Compiled with VS 2022: Release AMD64, AVX2 ISA, LTCG | |
// RDTSC time on Ryzen 7 8700G for 1024 matrices: 15834 Eigen, 7224 DirectXMath | |
constexpr bool useEigen = true; | |
// Eigen 3.4.0 | |
#include <Eigen/Eigen> | |
__forceinline void multiplyWithEigen( float* rdi, const float* rsi ) | |
{ | |
using Mat = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>; | |
static_assert( sizeof( Mat ) == 4 * 4 * sizeof( float ) ); | |
const Mat& lhs = *(const Mat*)( rsi ); | |
const Mat& rhs = *(const Mat*)( rsi + 16 ); | |
Mat& prod = *(Mat*)rdi; | |
prod.noalias() = lhs * rhs; | |
} | |
// DirectXMath current master | |
#include "DirectXMath/DirectXMath.h" | |
__forceinline void multiplyWithDxMath( float* rdi, const float* rsi ) | |
{ | |
using namespace DirectX; | |
XMMATRIX lhs = XMLoadFloat4x4( (const XMFLOAT4X4*)( rsi ) ); | |
XMMATRIX rhs = XMLoadFloat4x4( (const XMFLOAT4X4*)( rsi + 16 ) ); | |
XMMATRIX prod = XMMatrixMultiply( lhs, rhs ); | |
XMStoreFloat4x4( (XMFLOAT4X4*)rdi, prod ); | |
} | |
static void __declspec( noinline ) multiplyMatrices( float* rdi, size_t count, const float* rsi ) | |
{ | |
const float* const rsiEnd = rsi + count * ( 16 * 2 ); | |
for( ; rsi < rsiEnd; rsi += ( 16 * 2 ), rdi += 16 ) | |
{ | |
if constexpr( useEigen ) | |
multiplyWithEigen( rdi, rsi ); | |
else | |
multiplyWithDxMath( rdi, rsi ); | |
} | |
} | |
#include <vector> | |
#include <random> | |
// Generate FP32 vector filled with [ -1 .. +1 ] random values | |
std::vector<float> makeRandomVector( size_t length ) | |
{ | |
std::vector<float> vec( length ); | |
std::random_device rd; // Seed | |
std::mt19937 gen( rd() ); // Mersenne Twister RNG | |
std::uniform_real_distribution<float> dis( -1.0f, 1.0f ); // Range [-1, +1] | |
for( float& value : vec ) | |
value = dis( gen ); | |
return vec; | |
} | |
#include <stdio.h> | |
int main() | |
{ | |
constexpr size_t countMatrices = 1024; | |
const std::vector<float> source = makeRandomVector( 4 * 4 * 2 * countMatrices ); | |
std::vector<float> result; | |
result.resize( 4 * 4 * countMatrices ); | |
const uint64_t start = __rdtsc(); | |
_ReadBarrier(); | |
multiplyMatrices( result.data(), countMatrices, source.data() ); | |
_ReadBarrier(); | |
const uint64_t end = __rdtsc(); | |
printf( "%lli cycles; %g\n", end - start, *result.rbegin() ); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment