Created
December 19, 2020 06:00
-
-
Save jwpeterson/e3f7973e74484e319b508953aed75eb7 to your computer and use it in GitHub Desktop.
Comparison of A*x + y for RealTensorValue, DenseMatrix, and Eigen::Matrix3d
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Basic include file needed for the mesh functionality. | |
#include "libmesh/libmesh.h" | |
#include "libmesh/mesh.h" | |
#include "libmesh/dof_map.h" | |
#include "libmesh/elem.h" | |
#include "libmesh/fe_interface.h" | |
#include "libmesh/fe_map.h" | |
#include "libmesh/fe_base.h" | |
#include "libmesh/quadrature_gauss.h" | |
#include "libmesh/enum_to_string.h" | |
#include "libmesh/tensor_value.h" | |
#include "libmesh/vector_value.h" | |
#include "libmesh/dense_matrix.h" | |
#include "libmesh/dense_vector.h" | |
// C++ include files that we need | |
#include <iostream> | |
#include <array> | |
#include <memory> | |
// Bring in everything from the libMesh namespace | |
using namespace libMesh; | |
// The main program. | |
int main (int argc, char ** argv) | |
{ | |
// Initialize libMesh. | |
LibMeshInit init (argc, argv); | |
auto r = [](){ return static_cast<Real>(std::rand()) / static_cast<Real>(RAND_MAX); }; | |
// Big vectors of random matrices and vectors | |
const unsigned int N=10000000; | |
std::vector<RealTensorValue> As; | |
std::vector<RealVectorValue> xs; | |
std::vector<RealVectorValue> ys; | |
As.reserve(N); | |
xs.reserve(N); | |
ys.reserve(N); | |
for (unsigned int i=0; i<N; ++i) | |
{ | |
As.emplace_back(r(), r(), r(), r(), r(), r(), r(), r(), r()); | |
xs.emplace_back(r(), r(), r()); | |
ys.emplace_back(r(), r(), r()); | |
} | |
// Same data structures but for DenseMatrix | |
std::vector<DenseMatrix<Real>> Dense_As; | |
std::vector<DenseVector<Real>> Dense_xs; | |
std::vector<DenseVector<Real>> Dense_ys; | |
Dense_As.reserve(N); | |
Dense_xs.reserve(N); | |
Dense_ys.reserve(N); | |
for (unsigned int i=0; i<N; ++i) | |
{ | |
DenseMatrix<Real> A(3,3); | |
DenseVector<Real> x(3); | |
DenseVector<Real> y(3); | |
A.get_values() = {r(), r(), r(), r(), r(), r(), r(), r(), r()}; | |
x.get_values() = {r(), r(), r()}; | |
y.get_values() = {r(), r(), r()}; | |
Dense_As.emplace_back(A); | |
Dense_xs.emplace_back(x); | |
Dense_ys.emplace_back(y); | |
} | |
// Same data structures but for Eigen::Matrix3d | |
std::vector<Eigen::Matrix3d> Eigen_As(N); | |
std::vector<Eigen::Vector3d> Eigen_xs(N); | |
std::vector<Eigen::Vector3d> Eigen_ys(N); | |
for (unsigned int i=0; i<N; ++i) | |
{ | |
Eigen_As[i] << r(), r(), r(), r(), r(), r(), r(), r(), r(); | |
Eigen_xs[i] << r(), r(), r(); | |
Eigen_ys[i] << r(), r(), r(); | |
} | |
PerfLog perf_log ("TypeTensor, DenseMatrix, Eigen::Matrix3d axpy()"); | |
const unsigned int n_loops = 10; | |
for (unsigned int loop=0; loop<n_loops; ++loop) | |
{ | |
// Output something for each loop so it doesn't seem like the | |
// code is just hanging. | |
libMesh::out << "Starting loop " << loop << std::endl; | |
std::vector<RealVectorValue> zs1(N); | |
std::vector<RealVectorValue> zs2(N); | |
// Note: I have tried timing the operator approach first and | |
// vice-versa, but this did not seem to make any difference in | |
// the results. | |
// Time how long the "operator" approach to computing A*x + y takes | |
perf_log.push("TypeTensor A*x + y"); | |
for (unsigned int i=0; i<N; ++i) | |
zs1[i] = As[i] * xs[i] + ys[i]; | |
perf_log.pop("TypeTensor A*x + y"); | |
// Time how long the axpy() approach takes | |
perf_log.push("TypeTensor::axpy()"); | |
for (unsigned int i=0; i<N; ++i) | |
zs2[i] = RealTensorValue::axpy(As[i], xs[i], ys[i]); | |
perf_log.pop("TypeTensor::axpy()"); | |
// Keep the compiler honest by accessing entries of zs1 and zs2 | |
Real norm_sum = 0.; | |
for (unsigned int i=0; i<N; ++i) | |
norm_sum += (zs1[i] - zs2[i]).norm(); | |
libMesh::out << "norm_sum = " << norm_sum << std::endl; | |
// Time how long the DenseMatrix approach takes. Allocate space | |
// for all the 3x1 DenseVectors first (untimed) using the std::vector | |
// constructor that makes N copies of its arg. | |
std::vector<DenseVector<Real>> zs3(N, DenseVector<Real>(3)); | |
perf_log.push("DenseMatrix A*x + y"); | |
for (unsigned int i=0; i<N; ++i) | |
{ | |
// We compute the triple product as | |
// D = A; D *= B; D *= C; | |
// although there are other things we could try, such as | |
// left_multiply(), etc. there are no operators defined on | |
// DenseMatrix. | |
zs3[i] = Dense_ys[i]; | |
Dense_As[i].vector_mult(/*dest*/zs3[i], /*arg*/Dense_xs[i]); | |
} | |
perf_log.pop("DenseMatrix A*x + y"); | |
// Time how long the Eigen::Matrix3d approach takes | |
std::vector<Eigen::Vector3d> Ds4(N); | |
perf_log.push("Eigen::Matrix3d A*x + y"); | |
for (unsigned int i=0; i<N; ++i) | |
Ds4[i] = Eigen_As[i] * Eigen_xs[i] + Eigen_ys[i]; | |
perf_log.pop("Eigen::Matrix3d A*x + y"); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment