Created
July 12, 2013 12:52
-
-
Save hi2p-perim/5984231 to your computer and use it in GitHub Desktop.
Simple benchmark of 3D math libraries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// 3D math library benchmark | |
// Entries | |
// - glm 0.9.4.4 | |
// -- http://glm.g-truc.net/ | |
// - Eigen 3.1.3 | |
// -- http://eigen.tuxfamily.org | |
// - DirectXMath | |
// -- http://msdn.microsoft.com/en-us/library/ee415571(v=vs.85).aspx | |
// -- http://code.msdn.microsoft.com/windowsdesktop/Direct3D-Tutorial-Win32-829979ef | |
// - vectormath (part of the Bullet library) 2.81 | |
// -- http://www.bulletphysics.com/ | |
// | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <functional> | |
#include <chrono> | |
#include <glm.hpp> | |
#include <gtx/simd_vec4.hpp> | |
#include <gtx/simd_mat4.hpp> | |
#include <gtc/type_ptr.hpp> | |
#include <DirectXMath.h> | |
#include <vmInclude.h> | |
#include <Eigen/Core> | |
using namespace DirectX; | |
namespace ch = std::chrono; | |
const int Iter = 1<<28; | |
void RunBench_GLM() | |
{ | |
glm::vec4 v(1.0f); | |
glm::vec4 v2; | |
glm::mat4 m(1.0f); | |
for (int i = 0; i < Iter; i++) | |
{ | |
v2 += m * v; | |
} | |
auto t = v2; | |
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl; | |
} | |
void RunBench_Eigen() | |
{ | |
Eigen::Vector4f v(1.0f, 1.0f, 1.0f, 1.0f); | |
Eigen::Vector4f v2; | |
auto m = Eigen::Matrix4f::Identity(); | |
for (int i = 0; i < Iter; i++) | |
{ | |
v2 += m * v; | |
} | |
auto t = v2; | |
std::cout << t.x() << " " << t.y() << " " << t.z() << " " << t.w() << std::endl; | |
} | |
void RunBench_GLM_SIMD() | |
{ | |
glm::detail::fvec4SIMD v(1.0f); | |
glm::detail::fvec4SIMD v2(0.0f); | |
glm::detail::fmat4x4SIMD m(1.0f); | |
for (int i = 0; i < Iter; i++) | |
{ | |
v2 += v * m; | |
} | |
auto t = glm::vec4_cast(v2); | |
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl; | |
} | |
void RunBench_DirectXMath() | |
{ | |
auto v = XMVectorReplicate(1.0f); | |
auto v2 = XMVectorReplicate(0.0f); | |
auto m = XMMatrixIdentity(); | |
for (int i = 0; i < Iter; i++) | |
{ | |
v2 += XMVector4Transform(v, m); | |
} | |
XMFLOAT4A t; | |
XMStoreFloat4A(&t, v2); | |
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl; | |
} | |
void RunBench_Bullet_VectorMath() | |
{ | |
namespace vm = Vectormath::Aos; | |
vm::Vector4 v(1.0f); | |
vm::Vector4 v2(0.0f); | |
auto m = vm::Matrix4::identity(); | |
for (int i = 0; i < Iter; i++) | |
{ | |
v2 += m * v; | |
} | |
std::cout << v2[0] << " " << v2[1] << " " << v2[2] << " " << v2[3] << std::endl; | |
} | |
void RunBench_Double_GLM() | |
{ | |
glm::dvec4 v(1.0); | |
glm::dvec4 v2; | |
glm::dmat4 m(1.0); | |
for (int i = 0; i < Iter; i++) | |
{ | |
v2 += v * m; | |
} | |
auto t = v2; | |
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl; | |
} | |
void RunBench_Double_AVX() | |
{ | |
__m256d v = _mm256_set_pd(1, 1, 1, 1); | |
__m256d s = _mm256_setzero_pd(); | |
__m256d m[4] = | |
{ | |
_mm256_set_pd(1, 0, 0, 0), | |
_mm256_set_pd(0, 1, 0, 0), | |
_mm256_set_pd(0, 0, 1, 0), | |
_mm256_set_pd(0, 0, 0, 1) | |
}; | |
for (int i = 0; i < Iter; i++) | |
{ | |
__m256d v0 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(0, 0, 0, 0)); | |
__m256d v1 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(1, 1, 1, 1)); | |
__m256d v2 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(2, 2, 2, 2)); | |
__m256d v3 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(3, 3, 3, 3)); | |
__m256d m0 = _mm256_mul_pd(m[0], v0); | |
__m256d m1 = _mm256_mul_pd(m[1], v1); | |
__m256d m2 = _mm256_mul_pd(m[2], v2); | |
__m256d m3 = _mm256_mul_pd(m[3], v3); | |
__m256d a0 = _mm256_add_pd(m0, m1); | |
__m256d a1 = _mm256_add_pd(m2, m3); | |
__m256d a2 = _mm256_add_pd(a0, a1); | |
s = _mm256_add_pd(s, a2); | |
} | |
_declspec(align(16)) double t[4]; | |
_mm256_store_pd(t, s); | |
std::cout << t[0] << " " << t[1] << " " << t[2] << " " << t[3] << std::endl; | |
} | |
int main() | |
{ | |
std::vector<std::pair<std::string, std::function<void ()>>> benches; | |
benches.push_back(std::make_pair("GLM", RunBench_GLM)); | |
benches.push_back(std::make_pair("Eigen", RunBench_Eigen)); | |
benches.push_back(std::make_pair("GLM_SIMD", RunBench_GLM_SIMD)); | |
benches.push_back(std::make_pair("DirectXMath", RunBench_DirectXMath)); | |
benches.push_back(std::make_pair("Bullet_VectorMath", RunBench_Bullet_VectorMath)); | |
benches.push_back(std::make_pair("Double_GLM", RunBench_Double_GLM)); | |
benches.push_back(std::make_pair("Double_AVX", RunBench_Double_AVX)); | |
for (auto& bench : benches) | |
{ | |
std::cout << "Begin [ " << bench.first << " ]" << std::endl; | |
auto start = ch::high_resolution_clock::now(); | |
bench.second(); | |
auto end = ch::high_resolution_clock::now(); | |
double elapsed = (double)ch::duration_cast<ch::milliseconds>(end - start).count() / 1000.0; | |
std::cout << "End [ " << bench.first << " ] : " << elapsed << " seconds" << std::endl; | |
} | |
std::cin.get(); | |
return 0; | |
} |
Your AVX2 code is incorrect, the compiler warns (MSVC) or gives an error (clang) about this:
__m256d v0 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(0, 0, 0, 0));
__m256d v1 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(1, 1, 1, 1));
__m256d v2 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(2, 2, 2, 2));
__m256d v3 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(3, 3, 3, 3));
shuffle_pd
can only shuffle within the top 128 bits and bottom 128 bits, not across them. Hence the intermediate that you pass should also only be 4 bits ( _MM_SHUFFLE
creates an 8 bit intermediate).
In this specific case because all values of v
are the same it will result in the same answer but this code is will produce incorrect result if not every value in v
is the same.
The correct code is:
__m256d v0 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(0, 0, 0, 0));
__m256d v1 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(1, 1, 1, 1));
__m256d v2 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(2, 2, 2, 2));
__m256d v3 = _mm256_permute4x64_pd(v, _MM_SHUFFLE(3, 3, 3, 3));
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Would be nice if you pasted the results as a comment or something!