Skip to content

Instantly share code, notes, and snippets.

@SteveBronder
Last active July 30, 2020 06:30
Show Gist options
  • Save SteveBronder/37a94838f8ce7620797d44ceb4c7e2a4 to your computer and use it in GitHub Desktop.
Save SteveBronder/37a94838f8ce7620797d44ceb4c7e2a4 to your computer and use it in GitHub Desktop.
#include <benchmark/benchmark.h>
#include <stan/math/mix.hpp>
#include <utility>
#include <vector>
#include <type_traits>
namespace stan {
namespace math {
struct Test1 {
double* a_;
size_t N_;
template <std::size_t size, typename S>
inline Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, const S& a) {
N_ = a.size();
if (needs_adj[0]) {
a_ = ChainableStack::instance_->memalloc_.alloc_array<double>(a.size());
Eigen::Map<plain_type_t<S>>(a_, a.size()) = a;
}
return Eigen::VectorXd::Ones(N_).eval();
}
template <std::size_t size, typename S>
inline auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, const S& adj) {
return std::make_tuple(adj);
}
};
/*
* This is the simplest adj_jac functor in town
*/
struct Test2 {
double* a_;
double* b_;
size_t N_;
template <std::size_t size, typename S1, typename S2>
inline Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, S1&& a, S2&& b) {
N_ = a.size();
if (needs_adj[0]) {
a_ = ChainableStack::instance_->memalloc_.alloc_array<double>(a.size());
Eigen::Map<plain_type_t<S1>>(a_, a.rows(), a.cols()) = a;
}
if (needs_adj[1]) {
b_ = ChainableStack::instance_->memalloc_.alloc_array<double>(b.size());
Eigen::Map<plain_type_t<S2>>(b_, b.rows(), b.cols()) = b;
}
return Eigen::VectorXd::Ones(N_).eval();
}
template <std::size_t size, typename S>
inline auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, const S& adj) {
return std::forward_as_tuple(adj, adj);
}
};
/*
* This is the simplest adj_jac functor in town
*/
struct Test4 {
double* a_;
double* b_;
double* c_;
double* d_;
size_t N_;
template <std::size_t size, typename S1, typename S2, typename S3, typename S4>
inline Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, S1&& a, S2&& b, S3&& c, S4&& d) {
N_ = a.size();
if (needs_adj[0]) {
a_ = ChainableStack::instance_->memalloc_.alloc_array<double>(a.size());
Eigen::Map<plain_type_t<S1>>(a_, a.rows(), a.cols()) = a;
}
if (needs_adj[1]) {
b_ = ChainableStack::instance_->memalloc_.alloc_array<double>(b.size());
Eigen::Map<plain_type_t<S2>>(b_, b.rows(), b.cols()) = b;
}
if (needs_adj[2]) {
c_ = ChainableStack::instance_->memalloc_.alloc_array<double>(c.size());
Eigen::Map<plain_type_t<S3>>(c_, c.rows(), c.cols()) = c;
}
if (needs_adj[3]) {
d_ = ChainableStack::instance_->memalloc_.alloc_array<double>(d.size());
Eigen::Map<plain_type_t<S4>>(d_, d.rows(), d.cols()) = d;
}
return Eigen::VectorXd::Ones(N_).eval();
}
template <std::size_t size, typename S>
inline auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, S&& adj) {
return std::forward_as_tuple(adj, adj, adj, adj);
}
};
}
}
static void one_args_adj_jac(benchmark::State& state) {
for (auto _ : state) {
using stan::math::var;
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>;
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>;
eig_vec_v x1(eig_vec_d::Ones(state.range(0)));
benchmark::DoNotOptimize(x1.data());
using tester = stan::math::Test1;
auto start = std::chrono::high_resolution_clock::now();
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1));
benchmark::DoNotOptimize(foo.vi_);
foo.grad();
auto end = std::chrono::high_resolution_clock::now();
benchmark::ClobberMemory();
stan::math::recover_memory();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed_seconds.count());
}
}
static void two_args_adj_jac(benchmark::State& state) {
for (auto _ : state) {
using stan::math::var;
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>;
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>;
eig_vec_v x1(eig_vec_d::Ones(state.range(0)));
eig_vec_v x2(eig_vec_d::Ones(state.range(0)));
benchmark::DoNotOptimize(x1.data());
benchmark::DoNotOptimize(x2.data());
using tester = stan::math::Test2;
auto start = std::chrono::high_resolution_clock::now();
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2));
benchmark::DoNotOptimize(foo.vi_);
foo.grad();
auto end = std::chrono::high_resolution_clock::now();
benchmark::ClobberMemory();
stan::math::recover_memory();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed_seconds.count());
}
}
static void four_args_adj_jac(benchmark::State& state) {
for (auto _ : state) {
using stan::math::var;
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>;
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>;
eig_vec_v x1(eig_vec_d::Ones(state.range(0)));
eig_vec_v x2(eig_vec_d::Ones(state.range(0)));
eig_vec_v x3(eig_vec_d::Ones(state.range(0)));
eig_vec_v x4(eig_vec_d::Ones(state.range(0)));
benchmark::DoNotOptimize(x1.data());
benchmark::DoNotOptimize(x2.data());
benchmark::DoNotOptimize(x3.data());
benchmark::DoNotOptimize(x4.data());
using tester = stan::math::Test4;
auto start = std::chrono::high_resolution_clock::now();
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2, x3, x4));
benchmark::DoNotOptimize(foo.vi_);
foo.grad();
auto end = std::chrono::high_resolution_clock::now();
benchmark::ClobberMemory();
stan::math::recover_memory();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed_seconds.count());
}
}
BENCHMARK(one_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime();
BENCHMARK(two_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime();
BENCHMARK(four_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime();
BENCHMARK_MAIN();
#include <benchmark/benchmark.h>
#include <stan/math/mix.hpp>
#include <utility>
#include <vector>
#include <type_traits>
namespace stan {
namespace math {
template <typename T1>
struct Test1 {
adj_op<T1> a_;
template <typename S1>
Test1(S1&& a) : a_(a.size()) {}
template <typename S1>
inline auto operator()(S1&& a) {
if (is_var<T1>::value) {
a_.map() = a;
}
return Eigen::MatrixXd::Ones(a_.rows(), a_.cols()).eval();
}
template <typename S>
inline auto multiply_adjoint_jacobian(S&& adj) {
return std::forward_as_tuple(std::forward<S>(adj));
}
};
/*
* This is the simplest adj_jac functor in town
*/
template <typename T1, typename T2>
struct Test2 {
adj_op<T1> a_;
adj_op<T2> b_;
template <typename S1, typename S2>
Test2(S1&& a, S2&& b) : a_(a.size()), b_(b.size()) {}
template <typename S1, typename S2>
inline auto operator()(S1&& a, S2&& b) {
if (is_var<T1>::value) {
a_.map() = a;
}
if (is_var<T2>::value) {
b_.map() = b;
}
return Eigen::MatrixXd::Ones(a_.rows(), a_.cols()).eval();
}
template <typename S>
inline auto multiply_adjoint_jacobian(S&& adj) {
return std::forward_as_tuple(adj, adj);
}
};
/*
* This is the simplest adj_jac functor in town
*/
template <typename T1, typename T2, typename T3, typename T4>
struct Test4 {
adj_op<T1> a_;
adj_op<T2> b_;
adj_op<T3> c_;
adj_op<T4> d_;
template <typename S1, typename S2, typename S3, typename S4>
Test4(S1&& a, S2&& b, S3&& c, S4&& d) : a_(a.size()), b_(b.size()), c_(c.size()), d_(d.size()) {}
template <typename S1, typename S2, typename S3, typename S4>
inline auto operator()(S1&& a, S2&& b, S3&& c, S4&& d) {
if (is_var<T1>::value) {
a_.map() = a;
}
if (is_var<T2>::value) {
b_.map() = b;
}
if (is_var<T3>::value) {
c_.map() = c;
}
if (is_var<T4>::value) {
d_.map() = d;
}
return Eigen::MatrixXd::Ones(a_.rows(), a_.cols()).eval();
}
template <typename S>
inline auto multiply_adjoint_jacobian(S&& adj) {
return std::forward_as_tuple(adj, adj, adj, adj);
}
};
}
}
static void one_args_adj_jac(benchmark::State& state) {
for (auto _ : state) {
using stan::math::var;
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>;
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>;
eig_vec_v x1(eig_vec_d::Ones(state.range(0)));
benchmark::DoNotOptimize(x1.data());
using tester = stan::math::Test1<eig_vec_v>;
auto start = std::chrono::high_resolution_clock::now();
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1));
benchmark::DoNotOptimize(foo.vi_);
foo.grad();
auto end = std::chrono::high_resolution_clock::now();
benchmark::ClobberMemory();
stan::math::recover_memory();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed_seconds.count());
}
}
static void two_args_adj_jac(benchmark::State& state) {
for (auto _ : state) {
using stan::math::var;
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>;
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>;
eig_vec_v x1(eig_vec_d::Ones(state.range(0)));
eig_vec_v x2(eig_vec_d::Ones(state.range(0)));
benchmark::DoNotOptimize(x1.data());
benchmark::DoNotOptimize(x2.data());
auto start = std::chrono::high_resolution_clock::now();
using tester = stan::math::Test2<eig_vec_v, eig_vec_v>;
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2));
benchmark::DoNotOptimize(foo.vi_);
foo.grad();
auto end = std::chrono::high_resolution_clock::now();
benchmark::ClobberMemory();
stan::math::recover_memory();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed_seconds.count());
}
}
static void four_args_adj_jac(benchmark::State& state) {
for (auto _ : state) {
using stan::math::var;
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>;
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>;
eig_vec_v x1(eig_vec_d::Ones(state.range(0)));
eig_vec_v x2(eig_vec_d::Ones(state.range(0)));
eig_vec_v x3(eig_vec_d::Ones(state.range(0)));
eig_vec_v x4(eig_vec_d::Ones(state.range(0)));
benchmark::DoNotOptimize(x1.data());
benchmark::DoNotOptimize(x2.data());
benchmark::DoNotOptimize(x3.data());
benchmark::DoNotOptimize(x4.data());
using tester = stan::math::Test4<eig_vec_v, eig_vec_v, eig_vec_v, eig_vec_v>;
auto start = std::chrono::high_resolution_clock::now();
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2, x3, x4));
benchmark::DoNotOptimize(foo.vi_);
foo.grad();
auto end = std::chrono::high_resolution_clock::now();
benchmark::ClobberMemory();
stan::math::recover_memory();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed_seconds.count());
}
}
BENCHMARK(one_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime();
BENCHMARK(two_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime();
BENCHMARK(four_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime();
BENCHMARK_MAIN();
Version Benchmark Time CPU Iterations
new one_args_adj_jac/2/manual_time_mean 88.3 ns 123 ns 20
new one_args_adj_jac/2/manual_time_median 88.4 ns 123 ns 20
new one_args_adj_jac/2/manual_time_stddev 0.643 ns 0.642 ns 20
new one_args_adj_jac/130/manual_time_mean 1015 ns 1501 ns 20
new one_args_adj_jac/130/manual_time_median 1012 ns 1500 ns 20
new one_args_adj_jac/130/manual_time_stddev 18.6 ns 19.4 ns 20
new one_args_adj_jac/258/manual_time_mean 1860 ns 2799 ns 20
new one_args_adj_jac/258/manual_time_median 1858 ns 2798 ns 20
new one_args_adj_jac/258/manual_time_stddev 24.5 ns 24.1 ns 20
new one_args_adj_jac/386/manual_time_mean 2815 ns 4187 ns 20
new one_args_adj_jac/386/manual_time_median 2770 ns 4148 ns 20
new one_args_adj_jac/386/manual_time_stddev 83.0 ns 83.3 ns 20
new one_args_adj_jac/514/manual_time_mean 3701 ns 5508 ns 20
new one_args_adj_jac/514/manual_time_median 3621 ns 5430 ns 20
new one_args_adj_jac/514/manual_time_stddev 204 ns 204 ns 20
new one_args_adj_jac/642/manual_time_mean 5584 ns 7822 ns 20
new one_args_adj_jac/642/manual_time_median 5573 ns 7809 ns 20
new one_args_adj_jac/642/manual_time_stddev 40.4 ns 45.5 ns 20
new one_args_adj_jac/770/manual_time_mean 5778 ns 8399 ns 20
new one_args_adj_jac/770/manual_time_median 5803 ns 8419 ns 20
new one_args_adj_jac/770/manual_time_stddev 510 ns 496 ns 20
new one_args_adj_jac/898/manual_time_mean 6356 ns 9407 ns 20
new one_args_adj_jac/898/manual_time_median 5954 ns 9017 ns 20
new one_args_adj_jac/898/manual_time_stddev 546 ns 534 ns 20
new two_args_adj_jac/2/manual_time_mean 86.5 ns 135 ns 20
new two_args_adj_jac/2/manual_time_median 86.4 ns 135 ns 20
new two_args_adj_jac/2/manual_time_stddev 0.388 ns 0.409 ns 20
new two_args_adj_jac/130/manual_time_mean 1143 ns 2076 ns 20
new two_args_adj_jac/130/manual_time_median 1146 ns 2075 ns 20
new two_args_adj_jac/130/manual_time_stddev 16.7 ns 23.5 ns 20
new two_args_adj_jac/258/manual_time_mean 2106 ns 3941 ns 20
new two_args_adj_jac/258/manual_time_median 2106 ns 3939 ns 20
new two_args_adj_jac/258/manual_time_stddev 5.02 ns 9.40 ns 20
new two_args_adj_jac/386/manual_time_mean 3076 ns 5784 ns 20
new two_args_adj_jac/386/manual_time_median 3033 ns 5728 ns 20
new two_args_adj_jac/386/manual_time_stddev 72.6 ns 85.3 ns 20
new two_args_adj_jac/514/manual_time_mean 4028 ns 7582 ns 20
new two_args_adj_jac/514/manual_time_median 3976 ns 7537 ns 20
new two_args_adj_jac/514/manual_time_stddev 106 ns 103 ns 20
new two_args_adj_jac/642/manual_time_mean 5083 ns 9596 ns 20
new two_args_adj_jac/642/manual_time_median 5034 ns 9562 ns 20
new two_args_adj_jac/642/manual_time_stddev 90.2 ns 96.3 ns 20
new two_args_adj_jac/770/manual_time_mean 5969 ns 11324 ns 20
new two_args_adj_jac/770/manual_time_median 5977 ns 11334 ns 20
new two_args_adj_jac/770/manual_time_stddev 26.9 ns 47.0 ns 20
new two_args_adj_jac/898/manual_time_mean 6988 ns 13217 ns 20
new two_args_adj_jac/898/manual_time_median 6932 ns 13148 ns 20
new two_args_adj_jac/898/manual_time_stddev 176 ns 204 ns 20
new four_args_adj_jac/2/manual_time_mean 102 ns 188 ns 20
new four_args_adj_jac/2/manual_time_median 102 ns 188 ns 20
new four_args_adj_jac/2/manual_time_stddev 1.05 ns 1.13 ns 20
new four_args_adj_jac/130/manual_time_mean 1355 ns 2898 ns 20
new four_args_adj_jac/130/manual_time_median 1348 ns 2879 ns 20
new four_args_adj_jac/130/manual_time_stddev 17.9 ns 52.5 ns 20
new four_args_adj_jac/258/manual_time_mean 2476 ns 5247 ns 20
new four_args_adj_jac/258/manual_time_median 2478 ns 5249 ns 20
new four_args_adj_jac/258/manual_time_stddev 18.9 ns 33.3 ns 20
new four_args_adj_jac/386/manual_time_mean 3571 ns 7685 ns 20
new four_args_adj_jac/386/manual_time_median 3570 ns 7686 ns 20
new four_args_adj_jac/386/manual_time_stddev 13.1 ns 30.6 ns 20
new four_args_adj_jac/514/manual_time_mean 4556 ns 10135 ns 20
new four_args_adj_jac/514/manual_time_median 4559 ns 10137 ns 20
new four_args_adj_jac/514/manual_time_stddev 16.2 ns 35.8 ns 20
new four_args_adj_jac/642/manual_time_mean 5579 ns 12754 ns 20
new four_args_adj_jac/642/manual_time_median 5580 ns 12744 ns 20
new four_args_adj_jac/642/manual_time_stddev 37.0 ns 46.7 ns 20
new four_args_adj_jac/770/manual_time_mean 6766 ns 15680 ns 20
new four_args_adj_jac/770/manual_time_median 6773 ns 15661 ns 20
new four_args_adj_jac/770/manual_time_stddev 35.3 ns 76.0 ns 20
new four_args_adj_jac/898/manual_time_mean 7948 ns 18587 ns 20
new four_args_adj_jac/898/manual_time_median 7939 ns 18600 ns 20
new four_args_adj_jac/898/manual_time_stddev 21.7 ns 64.3 ns 20
old one_args_adj_jac/2/manual_time_mean 87.9 ns 123 ns 20
old one_args_adj_jac/2/manual_time_median 87.8 ns 123 ns 20
old one_args_adj_jac/2/manual_time_stddev 0.410 ns 0.411 ns 20
old one_args_adj_jac/130/manual_time_mean 1103 ns 1605 ns 20
old one_args_adj_jac/130/manual_time_median 1092 ns 1595 ns 20
old one_args_adj_jac/130/manual_time_stddev 34.1 ns 31.0 ns 20
old one_args_adj_jac/258/manual_time_mean 1996 ns 2936 ns 20
old one_args_adj_jac/258/manual_time_median 1980 ns 2918 ns 20
old one_args_adj_jac/258/manual_time_stddev 79.0 ns 76.3 ns 20
old one_args_adj_jac/386/manual_time_mean 3264 ns 4638 ns 20
old one_args_adj_jac/386/manual_time_median 3330 ns 4700 ns 20
old one_args_adj_jac/386/manual_time_stddev 113 ns 115 ns 20
old one_args_adj_jac/514/manual_time_mean 3923 ns 5746 ns 20
old one_args_adj_jac/514/manual_time_median 3893 ns 5710 ns 20
old one_args_adj_jac/514/manual_time_stddev 176 ns 180 ns 20
old one_args_adj_jac/642/manual_time_mean 4912 ns 7154 ns 20
old one_args_adj_jac/642/manual_time_median 4590 ns 6833 ns 20
old one_args_adj_jac/642/manual_time_stddev 419 ns 430 ns 20
old one_args_adj_jac/770/manual_time_mean 5702 ns 8387 ns 20
old one_args_adj_jac/770/manual_time_median 5498 ns 8219 ns 20
old one_args_adj_jac/770/manual_time_stddev 288 ns 293 ns 20
old one_args_adj_jac/898/manual_time_mean 6852 ns 9968 ns 20
old one_args_adj_jac/898/manual_time_median 6358 ns 9474 ns 20
old one_args_adj_jac/898/manual_time_stddev 662 ns 678 ns 20
old two_args_adj_jac/2/manual_time_mean 100 ns 149 ns 20
old two_args_adj_jac/2/manual_time_median 100 ns 148 ns 20
old two_args_adj_jac/2/manual_time_stddev 0.567 ns 0.718 ns 20
old two_args_adj_jac/130/manual_time_mean 1227 ns 2212 ns 20
old two_args_adj_jac/130/manual_time_median 1227 ns 2207 ns 20
old two_args_adj_jac/130/manual_time_stddev 2.57 ns 33.5 ns 20
old two_args_adj_jac/258/manual_time_mean 2237 ns 3979 ns 20
old two_args_adj_jac/258/manual_time_median 2234 ns 3997 ns 20
old two_args_adj_jac/258/manual_time_stddev 8.38 ns 58.7 ns 20
old two_args_adj_jac/386/manual_time_mean 3212 ns 5693 ns 20
old two_args_adj_jac/386/manual_time_median 3210 ns 5811 ns 20
old two_args_adj_jac/386/manual_time_stddev 6.77 ns 178 ns 20
old two_args_adj_jac/514/manual_time_mean 4198 ns 7613 ns 20
old two_args_adj_jac/514/manual_time_median 4198 ns 7653 ns 20
old two_args_adj_jac/514/manual_time_stddev 4.13 ns 115 ns 20
old two_args_adj_jac/642/manual_time_mean 5288 ns 9589 ns 20
old two_args_adj_jac/642/manual_time_median 5278 ns 9551 ns 20
old two_args_adj_jac/642/manual_time_stddev 37.0 ns 338 ns 20
old two_args_adj_jac/770/manual_time_mean 6355 ns 11131 ns 20
old two_args_adj_jac/770/manual_time_median 6352 ns 11119 ns 20
old two_args_adj_jac/770/manual_time_stddev 16.1 ns 45.7 ns 20
old two_args_adj_jac/898/manual_time_mean 7421 ns 13129 ns 20
old two_args_adj_jac/898/manual_time_median 7421 ns 13030 ns 20
old two_args_adj_jac/898/manual_time_stddev 25.2 ns 198 ns 20
old four_args_adj_jac/2/manual_time_mean 111 ns 204 ns 20
old four_args_adj_jac/2/manual_time_median 111 ns 203 ns 20
old four_args_adj_jac/2/manual_time_stddev 0.818 ns 3.08 ns 20
old four_args_adj_jac/130/manual_time_mean 1618 ns 3198 ns 20
old four_args_adj_jac/130/manual_time_median 1626 ns 3184 ns 20
old four_args_adj_jac/130/manual_time_stddev 18.6 ns 46.2 ns 20
old four_args_adj_jac/258/manual_time_mean 2935 ns 5784 ns 20
old four_args_adj_jac/258/manual_time_median 2933 ns 5778 ns 20
old four_args_adj_jac/258/manual_time_stddev 20.3 ns 37.0 ns 20
old four_args_adj_jac/386/manual_time_mean 4195 ns 8433 ns 20
old four_args_adj_jac/386/manual_time_median 4199 ns 8432 ns 20
old four_args_adj_jac/386/manual_time_stddev 14.8 ns 24.8 ns 20
old four_args_adj_jac/514/manual_time_mean 5518 ns 11344 ns 20
old four_args_adj_jac/514/manual_time_median 5513 ns 11331 ns 20
old four_args_adj_jac/514/manual_time_stddev 22.5 ns 60.3 ns 20
old four_args_adj_jac/642/manual_time_mean 6822 ns 14349 ns 20
old four_args_adj_jac/642/manual_time_median 6824 ns 14348 ns 20
old four_args_adj_jac/642/manual_time_stddev 39.7 ns 57.4 ns 20
old four_args_adj_jac/770/manual_time_mean 8176 ns 17516 ns 20
old four_args_adj_jac/770/manual_time_median 8118 ns 17440 ns 20
old four_args_adj_jac/770/manual_time_stddev 249 ns 359 ns 20
old four_args_adj_jac/898/manual_time_mean 9465 ns 20602 ns 20
old four_args_adj_jac/898/manual_time_median 9462 ns 20588 ns 20
old four_args_adj_jac/898/manual_time_stddev 38.3 ns 79.9 ns 20
library(data.table)
library(ggplot2)
perf_dt = fread("./adj_jac_new_vs_old.csv")
colnames(perf_dt) = tolower(colnames(perf_dt))
perf_dt = perf_dt[!grepl("median", benchmark)]
perf_dt[grepl("mean", benchmark), stat := "mean"]
perf_dt[grepl("stddev", benchmark), stat := "stddev"]
perf_dt[, size := as.numeric(sapply(strsplit(benchmark, "/"), "[[", 2))]
#perf_dt[, size := as.numeric(sub(".*/", "", benchmark))]
perf_dt[, time := as.numeric(sub("ns", "", time))]
perf_dt[, cpu := as.numeric(sub("ns", "", cpu))]
perf_dt[, bench := sapply(strsplit(benchmark, "/"), "[[", 1)]
perf_dt = perf_dt[!grepl("toss_me", benchmark),]
perf_dt[grepl("old", version), version := "current"]
ggplot(perf_dt[stat == "mean"], aes(x = size, y = cpu, color = version)) +
geom_line() +
facet_wrap(~bench, nrow = 3, ncol = 1) +
scale_y_log10() +
scale_x_log10() +
ylab("") +
xlab("Size of Vectors") +
ggtitle("Construction and Gradient Calc for New Vs. Current Adjoint Jac", "In Nanoseconds")
perf_dt[, benchmark := NULL]
perf_melt_dt = melt(perf_dt[, .(bench, size, cpu, stat, version)], id.vars = c("bench", "size", "stat", "version"), measure.vars = c("cpu"))
perf_cast_dt = dcast(perf_melt_dt, bench+size~version+stat, value.var = "value")
perf_cast_dt = perf_cast_dt[complete.cases(perf_cast_dt)]
perf_vs = perf_cast_dt[, .(
max_val = (current_mean + current_stddev * 2) / (new_mean + new_stddev * 2) - 1,
mean_val = (current_mean) / (new_mean) - 1,
min_val = (current_mean - current_stddev * 2) / (new_mean - new_stddev * 2) - 1
), .(bench, size)]
ggplot(perf_vs, aes(x = size, y = mean_val, ymin = min_val, ymax = max_val, color = bench)) +
geom_line() +
geom_errorbar() +
geom_hline(yintercept = 0) +
facet_wrap(~bench, nrow = 3, ncol = 1, scales = "free_y") +
ylab("") +
xlab("Size of Vector") +
ggtitle("Compare New Vs. Current adjoint_jac_apply", "Larger # Means New is Better")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment