Skip to content

Instantly share code, notes, and snippets.

@jwpeterson
Created December 19, 2020 06:00
Show Gist options
  • Save jwpeterson/e3f7973e74484e319b508953aed75eb7 to your computer and use it in GitHub Desktop.
Save jwpeterson/e3f7973e74484e319b508953aed75eb7 to your computer and use it in GitHub Desktop.
Comparison of A*x + y for RealTensorValue, DenseMatrix, and Eigen::Matrix3d
// Basic include file needed for the mesh functionality.
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/dof_map.h"
#include "libmesh/elem.h"
#include "libmesh/fe_interface.h"
#include "libmesh/fe_map.h"
#include "libmesh/fe_base.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/enum_to_string.h"
#include "libmesh/tensor_value.h"
#include "libmesh/vector_value.h"
#include "libmesh/dense_matrix.h"
#include "libmesh/dense_vector.h"
// C++ include files that we need
#include <iostream>
#include <array>
#include <memory>
// Bring in everything from the libMesh namespace
using namespace libMesh;
// The main program.
int main (int argc, char ** argv)
{
// Initialize libMesh.
LibMeshInit init (argc, argv);
auto r = [](){ return static_cast<Real>(std::rand()) / static_cast<Real>(RAND_MAX); };
// Big vectors of random matrices and vectors
const unsigned int N=10000000;
std::vector<RealTensorValue> As;
std::vector<RealVectorValue> xs;
std::vector<RealVectorValue> ys;
As.reserve(N);
xs.reserve(N);
ys.reserve(N);
for (unsigned int i=0; i<N; ++i)
{
As.emplace_back(r(), r(), r(), r(), r(), r(), r(), r(), r());
xs.emplace_back(r(), r(), r());
ys.emplace_back(r(), r(), r());
}
// Same data structures but for DenseMatrix
std::vector<DenseMatrix<Real>> Dense_As;
std::vector<DenseVector<Real>> Dense_xs;
std::vector<DenseVector<Real>> Dense_ys;
Dense_As.reserve(N);
Dense_xs.reserve(N);
Dense_ys.reserve(N);
for (unsigned int i=0; i<N; ++i)
{
DenseMatrix<Real> A(3,3);
DenseVector<Real> x(3);
DenseVector<Real> y(3);
A.get_values() = {r(), r(), r(), r(), r(), r(), r(), r(), r()};
x.get_values() = {r(), r(), r()};
y.get_values() = {r(), r(), r()};
Dense_As.emplace_back(A);
Dense_xs.emplace_back(x);
Dense_ys.emplace_back(y);
}
// Same data structures but for Eigen::Matrix3d
std::vector<Eigen::Matrix3d> Eigen_As(N);
std::vector<Eigen::Vector3d> Eigen_xs(N);
std::vector<Eigen::Vector3d> Eigen_ys(N);
for (unsigned int i=0; i<N; ++i)
{
Eigen_As[i] << r(), r(), r(), r(), r(), r(), r(), r(), r();
Eigen_xs[i] << r(), r(), r();
Eigen_ys[i] << r(), r(), r();
}
PerfLog perf_log ("TypeTensor, DenseMatrix, Eigen::Matrix3d axpy()");
const unsigned int n_loops = 10;
for (unsigned int loop=0; loop<n_loops; ++loop)
{
// Output something for each loop so it doesn't seem like the
// code is just hanging.
libMesh::out << "Starting loop " << loop << std::endl;
std::vector<RealVectorValue> zs1(N);
std::vector<RealVectorValue> zs2(N);
// Note: I have tried timing the operator approach first and
// vice-versa, but this did not seem to make any difference in
// the results.
// Time how long the "operator" approach to computing A*x + y takes
perf_log.push("TypeTensor A*x + y");
for (unsigned int i=0; i<N; ++i)
zs1[i] = As[i] * xs[i] + ys[i];
perf_log.pop("TypeTensor A*x + y");
// Time how long the axpy() approach takes
perf_log.push("TypeTensor::axpy()");
for (unsigned int i=0; i<N; ++i)
zs2[i] = RealTensorValue::axpy(As[i], xs[i], ys[i]);
perf_log.pop("TypeTensor::axpy()");
// Keep the compiler honest by accessing entries of zs1 and zs2
Real norm_sum = 0.;
for (unsigned int i=0; i<N; ++i)
norm_sum += (zs1[i] - zs2[i]).norm();
libMesh::out << "norm_sum = " << norm_sum << std::endl;
// Time how long the DenseMatrix approach takes. Allocate space
// for all the 3x1 DenseVectors first (untimed) using the std::vector
// constructor that makes N copies of its arg.
std::vector<DenseVector<Real>> zs3(N, DenseVector<Real>(3));
perf_log.push("DenseMatrix A*x + y");
for (unsigned int i=0; i<N; ++i)
{
// We compute the triple product as
// D = A; D *= B; D *= C;
// although there are other things we could try, such as
// left_multiply(), etc. there are no operators defined on
// DenseMatrix.
zs3[i] = Dense_ys[i];
Dense_As[i].vector_mult(/*dest*/zs3[i], /*arg*/Dense_xs[i]);
}
perf_log.pop("DenseMatrix A*x + y");
// Time how long the Eigen::Matrix3d approach takes
std::vector<Eigen::Vector3d> Ds4(N);
perf_log.push("Eigen::Matrix3d A*x + y");
for (unsigned int i=0; i<N; ++i)
Ds4[i] = Eigen_As[i] * Eigen_xs[i] + Eigen_ys[i];
perf_log.pop("Eigen::Matrix3d A*x + y");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment