Last active
July 30, 2020 06:30
-
-
Save SteveBronder/37a94838f8ce7620797d44ceb4c7e2a4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <benchmark/benchmark.h> | |
#include <stan/math/mix.hpp> | |
#include <utility> | |
#include <vector> | |
#include <type_traits> | |
namespace stan { | |
namespace math { | |
struct Test1 { | |
double* a_; | |
size_t N_; | |
template <std::size_t size, typename S> | |
inline Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, const S& a) { | |
N_ = a.size(); | |
if (needs_adj[0]) { | |
a_ = ChainableStack::instance_->memalloc_.alloc_array<double>(a.size()); | |
Eigen::Map<plain_type_t<S>>(a_, a.size()) = a; | |
} | |
return Eigen::VectorXd::Ones(N_).eval(); | |
} | |
template <std::size_t size, typename S> | |
inline auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, const S& adj) { | |
return std::make_tuple(adj); | |
} | |
}; | |
/* | |
* This is the simplest adj_jac functor in town | |
*/ | |
struct Test2 { | |
double* a_; | |
double* b_; | |
size_t N_; | |
template <std::size_t size, typename S1, typename S2> | |
inline Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, S1&& a, S2&& b) { | |
N_ = a.size(); | |
if (needs_adj[0]) { | |
a_ = ChainableStack::instance_->memalloc_.alloc_array<double>(a.size()); | |
Eigen::Map<plain_type_t<S1>>(a_, a.rows(), a.cols()) = a; | |
} | |
if (needs_adj[1]) { | |
b_ = ChainableStack::instance_->memalloc_.alloc_array<double>(b.size()); | |
Eigen::Map<plain_type_t<S2>>(b_, b.rows(), b.cols()) = b; | |
} | |
return Eigen::VectorXd::Ones(N_).eval(); | |
} | |
template <std::size_t size, typename S> | |
inline auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, const S& adj) { | |
return std::forward_as_tuple(adj, adj); | |
} | |
}; | |
/* | |
* This is the simplest adj_jac functor in town | |
*/ | |
struct Test4 { | |
double* a_; | |
double* b_; | |
double* c_; | |
double* d_; | |
size_t N_; | |
template <std::size_t size, typename S1, typename S2, typename S3, typename S4> | |
inline Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, S1&& a, S2&& b, S3&& c, S4&& d) { | |
N_ = a.size(); | |
if (needs_adj[0]) { | |
a_ = ChainableStack::instance_->memalloc_.alloc_array<double>(a.size()); | |
Eigen::Map<plain_type_t<S1>>(a_, a.rows(), a.cols()) = a; | |
} | |
if (needs_adj[1]) { | |
b_ = ChainableStack::instance_->memalloc_.alloc_array<double>(b.size()); | |
Eigen::Map<plain_type_t<S2>>(b_, b.rows(), b.cols()) = b; | |
} | |
if (needs_adj[2]) { | |
c_ = ChainableStack::instance_->memalloc_.alloc_array<double>(c.size()); | |
Eigen::Map<plain_type_t<S3>>(c_, c.rows(), c.cols()) = c; | |
} | |
if (needs_adj[3]) { | |
d_ = ChainableStack::instance_->memalloc_.alloc_array<double>(d.size()); | |
Eigen::Map<plain_type_t<S4>>(d_, d.rows(), d.cols()) = d; | |
} | |
return Eigen::VectorXd::Ones(N_).eval(); | |
} | |
template <std::size_t size, typename S> | |
inline auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, S&& adj) { | |
return std::forward_as_tuple(adj, adj, adj, adj); | |
} | |
}; | |
} | |
} | |
static void one_args_adj_jac(benchmark::State& state) { | |
for (auto _ : state) { | |
using stan::math::var; | |
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>; | |
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>; | |
eig_vec_v x1(eig_vec_d::Ones(state.range(0))); | |
benchmark::DoNotOptimize(x1.data()); | |
using tester = stan::math::Test1; | |
auto start = std::chrono::high_resolution_clock::now(); | |
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1)); | |
benchmark::DoNotOptimize(foo.vi_); | |
foo.grad(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
benchmark::ClobberMemory(); | |
stan::math::recover_memory(); | |
auto elapsed_seconds = | |
std::chrono::duration_cast<std::chrono::duration<double>>( | |
end - start); | |
state.SetIterationTime(elapsed_seconds.count()); | |
} | |
} | |
static void two_args_adj_jac(benchmark::State& state) { | |
for (auto _ : state) { | |
using stan::math::var; | |
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>; | |
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>; | |
eig_vec_v x1(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x2(eig_vec_d::Ones(state.range(0))); | |
benchmark::DoNotOptimize(x1.data()); | |
benchmark::DoNotOptimize(x2.data()); | |
using tester = stan::math::Test2; | |
auto start = std::chrono::high_resolution_clock::now(); | |
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2)); | |
benchmark::DoNotOptimize(foo.vi_); | |
foo.grad(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
benchmark::ClobberMemory(); | |
stan::math::recover_memory(); | |
auto elapsed_seconds = | |
std::chrono::duration_cast<std::chrono::duration<double>>( | |
end - start); | |
state.SetIterationTime(elapsed_seconds.count()); | |
} | |
} | |
static void four_args_adj_jac(benchmark::State& state) { | |
for (auto _ : state) { | |
using stan::math::var; | |
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>; | |
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>; | |
eig_vec_v x1(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x2(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x3(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x4(eig_vec_d::Ones(state.range(0))); | |
benchmark::DoNotOptimize(x1.data()); | |
benchmark::DoNotOptimize(x2.data()); | |
benchmark::DoNotOptimize(x3.data()); | |
benchmark::DoNotOptimize(x4.data()); | |
using tester = stan::math::Test4; | |
auto start = std::chrono::high_resolution_clock::now(); | |
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2, x3, x4)); | |
benchmark::DoNotOptimize(foo.vi_); | |
foo.grad(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
benchmark::ClobberMemory(); | |
stan::math::recover_memory(); | |
auto elapsed_seconds = | |
std::chrono::duration_cast<std::chrono::duration<double>>( | |
end - start); | |
state.SetIterationTime(elapsed_seconds.count()); | |
} | |
} | |
BENCHMARK(one_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime(); | |
BENCHMARK(two_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime(); | |
BENCHMARK(four_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime(); | |
BENCHMARK_MAIN(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <benchmark/benchmark.h> | |
#include <stan/math/mix.hpp> | |
#include <utility> | |
#include <vector> | |
#include <type_traits> | |
namespace stan { | |
namespace math { | |
template <typename T1> | |
struct Test1 { | |
adj_op<T1> a_; | |
template <typename S1> | |
Test1(S1&& a) : a_(a.size()) {} | |
template <typename S1> | |
inline auto operator()(S1&& a) { | |
if (is_var<T1>::value) { | |
a_.map() = a; | |
} | |
return Eigen::MatrixXd::Ones(a_.rows(), a_.cols()).eval(); | |
} | |
template <typename S> | |
inline auto multiply_adjoint_jacobian(S&& adj) { | |
return std::forward_as_tuple(std::forward<S>(adj)); | |
} | |
}; | |
/* | |
* This is the simplest adj_jac functor in town | |
*/ | |
template <typename T1, typename T2> | |
struct Test2 { | |
adj_op<T1> a_; | |
adj_op<T2> b_; | |
template <typename S1, typename S2> | |
Test2(S1&& a, S2&& b) : a_(a.size()), b_(b.size()) {} | |
template <typename S1, typename S2> | |
inline auto operator()(S1&& a, S2&& b) { | |
if (is_var<T1>::value) { | |
a_.map() = a; | |
} | |
if (is_var<T2>::value) { | |
b_.map() = b; | |
} | |
return Eigen::MatrixXd::Ones(a_.rows(), a_.cols()).eval(); | |
} | |
template <typename S> | |
inline auto multiply_adjoint_jacobian(S&& adj) { | |
return std::forward_as_tuple(adj, adj); | |
} | |
}; | |
/* | |
* This is the simplest adj_jac functor in town | |
*/ | |
template <typename T1, typename T2, typename T3, typename T4> | |
struct Test4 { | |
adj_op<T1> a_; | |
adj_op<T2> b_; | |
adj_op<T3> c_; | |
adj_op<T4> d_; | |
template <typename S1, typename S2, typename S3, typename S4> | |
Test4(S1&& a, S2&& b, S3&& c, S4&& d) : a_(a.size()), b_(b.size()), c_(c.size()), d_(d.size()) {} | |
template <typename S1, typename S2, typename S3, typename S4> | |
inline auto operator()(S1&& a, S2&& b, S3&& c, S4&& d) { | |
if (is_var<T1>::value) { | |
a_.map() = a; | |
} | |
if (is_var<T2>::value) { | |
b_.map() = b; | |
} | |
if (is_var<T3>::value) { | |
c_.map() = c; | |
} | |
if (is_var<T4>::value) { | |
d_.map() = d; | |
} | |
return Eigen::MatrixXd::Ones(a_.rows(), a_.cols()).eval(); | |
} | |
template <typename S> | |
inline auto multiply_adjoint_jacobian(S&& adj) { | |
return std::forward_as_tuple(adj, adj, adj, adj); | |
} | |
}; | |
} | |
} | |
static void one_args_adj_jac(benchmark::State& state) { | |
for (auto _ : state) { | |
using stan::math::var; | |
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>; | |
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>; | |
eig_vec_v x1(eig_vec_d::Ones(state.range(0))); | |
benchmark::DoNotOptimize(x1.data()); | |
using tester = stan::math::Test1<eig_vec_v>; | |
auto start = std::chrono::high_resolution_clock::now(); | |
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1)); | |
benchmark::DoNotOptimize(foo.vi_); | |
foo.grad(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
benchmark::ClobberMemory(); | |
stan::math::recover_memory(); | |
auto elapsed_seconds = | |
std::chrono::duration_cast<std::chrono::duration<double>>( | |
end - start); | |
state.SetIterationTime(elapsed_seconds.count()); | |
} | |
} | |
static void two_args_adj_jac(benchmark::State& state) { | |
for (auto _ : state) { | |
using stan::math::var; | |
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>; | |
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>; | |
eig_vec_v x1(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x2(eig_vec_d::Ones(state.range(0))); | |
benchmark::DoNotOptimize(x1.data()); | |
benchmark::DoNotOptimize(x2.data()); | |
auto start = std::chrono::high_resolution_clock::now(); | |
using tester = stan::math::Test2<eig_vec_v, eig_vec_v>; | |
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2)); | |
benchmark::DoNotOptimize(foo.vi_); | |
foo.grad(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
benchmark::ClobberMemory(); | |
stan::math::recover_memory(); | |
auto elapsed_seconds = | |
std::chrono::duration_cast<std::chrono::duration<double>>( | |
end - start); | |
state.SetIterationTime(elapsed_seconds.count()); | |
} | |
} | |
static void four_args_adj_jac(benchmark::State& state) { | |
for (auto _ : state) { | |
using stan::math::var; | |
using eig_vec_v = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>; | |
using eig_vec_d = Eigen::Matrix<double, Eigen::Dynamic, 1>; | |
eig_vec_v x1(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x2(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x3(eig_vec_d::Ones(state.range(0))); | |
eig_vec_v x4(eig_vec_d::Ones(state.range(0))); | |
benchmark::DoNotOptimize(x1.data()); | |
benchmark::DoNotOptimize(x2.data()); | |
benchmark::DoNotOptimize(x3.data()); | |
benchmark::DoNotOptimize(x4.data()); | |
using tester = stan::math::Test4<eig_vec_v, eig_vec_v, eig_vec_v, eig_vec_v>; | |
auto start = std::chrono::high_resolution_clock::now(); | |
auto foo = stan::math::sum(stan::math::adj_jac_apply<tester>(x1, x2, x3, x4)); | |
benchmark::DoNotOptimize(foo.vi_); | |
foo.grad(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
benchmark::ClobberMemory(); | |
stan::math::recover_memory(); | |
auto elapsed_seconds = | |
std::chrono::duration_cast<std::chrono::duration<double>>( | |
end - start); | |
state.SetIterationTime(elapsed_seconds.count()); | |
} | |
} | |
BENCHMARK(one_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime(); | |
BENCHMARK(two_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime(); | |
BENCHMARK(four_args_adj_jac)->DenseRange(2, 1024, 128)->UseManualTime(); | |
BENCHMARK_MAIN(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Version | Benchmark | Time | CPU | Iterations | |
---|---|---|---|---|---|
new | one_args_adj_jac/2/manual_time_mean | 88.3 ns | 123 ns | 20 | |
new | one_args_adj_jac/2/manual_time_median | 88.4 ns | 123 ns | 20 | |
new | one_args_adj_jac/2/manual_time_stddev | 0.643 ns | 0.642 ns | 20 | |
new | one_args_adj_jac/130/manual_time_mean | 1015 ns | 1501 ns | 20 | |
new | one_args_adj_jac/130/manual_time_median | 1012 ns | 1500 ns | 20 | |
new | one_args_adj_jac/130/manual_time_stddev | 18.6 ns | 19.4 ns | 20 | |
new | one_args_adj_jac/258/manual_time_mean | 1860 ns | 2799 ns | 20 | |
new | one_args_adj_jac/258/manual_time_median | 1858 ns | 2798 ns | 20 | |
new | one_args_adj_jac/258/manual_time_stddev | 24.5 ns | 24.1 ns | 20 | |
new | one_args_adj_jac/386/manual_time_mean | 2815 ns | 4187 ns | 20 | |
new | one_args_adj_jac/386/manual_time_median | 2770 ns | 4148 ns | 20 | |
new | one_args_adj_jac/386/manual_time_stddev | 83.0 ns | 83.3 ns | 20 | |
new | one_args_adj_jac/514/manual_time_mean | 3701 ns | 5508 ns | 20 | |
new | one_args_adj_jac/514/manual_time_median | 3621 ns | 5430 ns | 20 | |
new | one_args_adj_jac/514/manual_time_stddev | 204 ns | 204 ns | 20 | |
new | one_args_adj_jac/642/manual_time_mean | 5584 ns | 7822 ns | 20 | |
new | one_args_adj_jac/642/manual_time_median | 5573 ns | 7809 ns | 20 | |
new | one_args_adj_jac/642/manual_time_stddev | 40.4 ns | 45.5 ns | 20 | |
new | one_args_adj_jac/770/manual_time_mean | 5778 ns | 8399 ns | 20 | |
new | one_args_adj_jac/770/manual_time_median | 5803 ns | 8419 ns | 20 | |
new | one_args_adj_jac/770/manual_time_stddev | 510 ns | 496 ns | 20 | |
new | one_args_adj_jac/898/manual_time_mean | 6356 ns | 9407 ns | 20 | |
new | one_args_adj_jac/898/manual_time_median | 5954 ns | 9017 ns | 20 | |
new | one_args_adj_jac/898/manual_time_stddev | 546 ns | 534 ns | 20 | |
new | two_args_adj_jac/2/manual_time_mean | 86.5 ns | 135 ns | 20 | |
new | two_args_adj_jac/2/manual_time_median | 86.4 ns | 135 ns | 20 | |
new | two_args_adj_jac/2/manual_time_stddev | 0.388 ns | 0.409 ns | 20 | |
new | two_args_adj_jac/130/manual_time_mean | 1143 ns | 2076 ns | 20 | |
new | two_args_adj_jac/130/manual_time_median | 1146 ns | 2075 ns | 20 | |
new | two_args_adj_jac/130/manual_time_stddev | 16.7 ns | 23.5 ns | 20 | |
new | two_args_adj_jac/258/manual_time_mean | 2106 ns | 3941 ns | 20 | |
new | two_args_adj_jac/258/manual_time_median | 2106 ns | 3939 ns | 20 | |
new | two_args_adj_jac/258/manual_time_stddev | 5.02 ns | 9.40 ns | 20 | |
new | two_args_adj_jac/386/manual_time_mean | 3076 ns | 5784 ns | 20 | |
new | two_args_adj_jac/386/manual_time_median | 3033 ns | 5728 ns | 20 | |
new | two_args_adj_jac/386/manual_time_stddev | 72.6 ns | 85.3 ns | 20 | |
new | two_args_adj_jac/514/manual_time_mean | 4028 ns | 7582 ns | 20 | |
new | two_args_adj_jac/514/manual_time_median | 3976 ns | 7537 ns | 20 | |
new | two_args_adj_jac/514/manual_time_stddev | 106 ns | 103 ns | 20 | |
new | two_args_adj_jac/642/manual_time_mean | 5083 ns | 9596 ns | 20 | |
new | two_args_adj_jac/642/manual_time_median | 5034 ns | 9562 ns | 20 | |
new | two_args_adj_jac/642/manual_time_stddev | 90.2 ns | 96.3 ns | 20 | |
new | two_args_adj_jac/770/manual_time_mean | 5969 ns | 11324 ns | 20 | |
new | two_args_adj_jac/770/manual_time_median | 5977 ns | 11334 ns | 20 | |
new | two_args_adj_jac/770/manual_time_stddev | 26.9 ns | 47.0 ns | 20 | |
new | two_args_adj_jac/898/manual_time_mean | 6988 ns | 13217 ns | 20 | |
new | two_args_adj_jac/898/manual_time_median | 6932 ns | 13148 ns | 20 | |
new | two_args_adj_jac/898/manual_time_stddev | 176 ns | 204 ns | 20 | |
new | four_args_adj_jac/2/manual_time_mean | 102 ns | 188 ns | 20 | |
new | four_args_adj_jac/2/manual_time_median | 102 ns | 188 ns | 20 | |
new | four_args_adj_jac/2/manual_time_stddev | 1.05 ns | 1.13 ns | 20 | |
new | four_args_adj_jac/130/manual_time_mean | 1355 ns | 2898 ns | 20 | |
new | four_args_adj_jac/130/manual_time_median | 1348 ns | 2879 ns | 20 | |
new | four_args_adj_jac/130/manual_time_stddev | 17.9 ns | 52.5 ns | 20 | |
new | four_args_adj_jac/258/manual_time_mean | 2476 ns | 5247 ns | 20 | |
new | four_args_adj_jac/258/manual_time_median | 2478 ns | 5249 ns | 20 | |
new | four_args_adj_jac/258/manual_time_stddev | 18.9 ns | 33.3 ns | 20 | |
new | four_args_adj_jac/386/manual_time_mean | 3571 ns | 7685 ns | 20 | |
new | four_args_adj_jac/386/manual_time_median | 3570 ns | 7686 ns | 20 | |
new | four_args_adj_jac/386/manual_time_stddev | 13.1 ns | 30.6 ns | 20 | |
new | four_args_adj_jac/514/manual_time_mean | 4556 ns | 10135 ns | 20 | |
new | four_args_adj_jac/514/manual_time_median | 4559 ns | 10137 ns | 20 | |
new | four_args_adj_jac/514/manual_time_stddev | 16.2 ns | 35.8 ns | 20 | |
new | four_args_adj_jac/642/manual_time_mean | 5579 ns | 12754 ns | 20 | |
new | four_args_adj_jac/642/manual_time_median | 5580 ns | 12744 ns | 20 | |
new | four_args_adj_jac/642/manual_time_stddev | 37.0 ns | 46.7 ns | 20 | |
new | four_args_adj_jac/770/manual_time_mean | 6766 ns | 15680 ns | 20 | |
new | four_args_adj_jac/770/manual_time_median | 6773 ns | 15661 ns | 20 | |
new | four_args_adj_jac/770/manual_time_stddev | 35.3 ns | 76.0 ns | 20 | |
new | four_args_adj_jac/898/manual_time_mean | 7948 ns | 18587 ns | 20 | |
new | four_args_adj_jac/898/manual_time_median | 7939 ns | 18600 ns | 20 | |
new | four_args_adj_jac/898/manual_time_stddev | 21.7 ns | 64.3 ns | 20 | |
old | one_args_adj_jac/2/manual_time_mean | 87.9 ns | 123 ns | 20 | |
old | one_args_adj_jac/2/manual_time_median | 87.8 ns | 123 ns | 20 | |
old | one_args_adj_jac/2/manual_time_stddev | 0.410 ns | 0.411 ns | 20 | |
old | one_args_adj_jac/130/manual_time_mean | 1103 ns | 1605 ns | 20 | |
old | one_args_adj_jac/130/manual_time_median | 1092 ns | 1595 ns | 20 | |
old | one_args_adj_jac/130/manual_time_stddev | 34.1 ns | 31.0 ns | 20 | |
old | one_args_adj_jac/258/manual_time_mean | 1996 ns | 2936 ns | 20 | |
old | one_args_adj_jac/258/manual_time_median | 1980 ns | 2918 ns | 20 | |
old | one_args_adj_jac/258/manual_time_stddev | 79.0 ns | 76.3 ns | 20 | |
old | one_args_adj_jac/386/manual_time_mean | 3264 ns | 4638 ns | 20 | |
old | one_args_adj_jac/386/manual_time_median | 3330 ns | 4700 ns | 20 | |
old | one_args_adj_jac/386/manual_time_stddev | 113 ns | 115 ns | 20 | |
old | one_args_adj_jac/514/manual_time_mean | 3923 ns | 5746 ns | 20 | |
old | one_args_adj_jac/514/manual_time_median | 3893 ns | 5710 ns | 20 | |
old | one_args_adj_jac/514/manual_time_stddev | 176 ns | 180 ns | 20 | |
old | one_args_adj_jac/642/manual_time_mean | 4912 ns | 7154 ns | 20 | |
old | one_args_adj_jac/642/manual_time_median | 4590 ns | 6833 ns | 20 | |
old | one_args_adj_jac/642/manual_time_stddev | 419 ns | 430 ns | 20 | |
old | one_args_adj_jac/770/manual_time_mean | 5702 ns | 8387 ns | 20 | |
old | one_args_adj_jac/770/manual_time_median | 5498 ns | 8219 ns | 20 | |
old | one_args_adj_jac/770/manual_time_stddev | 288 ns | 293 ns | 20 | |
old | one_args_adj_jac/898/manual_time_mean | 6852 ns | 9968 ns | 20 | |
old | one_args_adj_jac/898/manual_time_median | 6358 ns | 9474 ns | 20 | |
old | one_args_adj_jac/898/manual_time_stddev | 662 ns | 678 ns | 20 | |
old | two_args_adj_jac/2/manual_time_mean | 100 ns | 149 ns | 20 | |
old | two_args_adj_jac/2/manual_time_median | 100 ns | 148 ns | 20 | |
old | two_args_adj_jac/2/manual_time_stddev | 0.567 ns | 0.718 ns | 20 | |
old | two_args_adj_jac/130/manual_time_mean | 1227 ns | 2212 ns | 20 | |
old | two_args_adj_jac/130/manual_time_median | 1227 ns | 2207 ns | 20 | |
old | two_args_adj_jac/130/manual_time_stddev | 2.57 ns | 33.5 ns | 20 | |
old | two_args_adj_jac/258/manual_time_mean | 2237 ns | 3979 ns | 20 | |
old | two_args_adj_jac/258/manual_time_median | 2234 ns | 3997 ns | 20 | |
old | two_args_adj_jac/258/manual_time_stddev | 8.38 ns | 58.7 ns | 20 | |
old | two_args_adj_jac/386/manual_time_mean | 3212 ns | 5693 ns | 20 | |
old | two_args_adj_jac/386/manual_time_median | 3210 ns | 5811 ns | 20 | |
old | two_args_adj_jac/386/manual_time_stddev | 6.77 ns | 178 ns | 20 | |
old | two_args_adj_jac/514/manual_time_mean | 4198 ns | 7613 ns | 20 | |
old | two_args_adj_jac/514/manual_time_median | 4198 ns | 7653 ns | 20 | |
old | two_args_adj_jac/514/manual_time_stddev | 4.13 ns | 115 ns | 20 | |
old | two_args_adj_jac/642/manual_time_mean | 5288 ns | 9589 ns | 20 | |
old | two_args_adj_jac/642/manual_time_median | 5278 ns | 9551 ns | 20 | |
old | two_args_adj_jac/642/manual_time_stddev | 37.0 ns | 338 ns | 20 | |
old | two_args_adj_jac/770/manual_time_mean | 6355 ns | 11131 ns | 20 | |
old | two_args_adj_jac/770/manual_time_median | 6352 ns | 11119 ns | 20 | |
old | two_args_adj_jac/770/manual_time_stddev | 16.1 ns | 45.7 ns | 20 | |
old | two_args_adj_jac/898/manual_time_mean | 7421 ns | 13129 ns | 20 | |
old | two_args_adj_jac/898/manual_time_median | 7421 ns | 13030 ns | 20 | |
old | two_args_adj_jac/898/manual_time_stddev | 25.2 ns | 198 ns | 20 | |
old | four_args_adj_jac/2/manual_time_mean | 111 ns | 204 ns | 20 | |
old | four_args_adj_jac/2/manual_time_median | 111 ns | 203 ns | 20 | |
old | four_args_adj_jac/2/manual_time_stddev | 0.818 ns | 3.08 ns | 20 | |
old | four_args_adj_jac/130/manual_time_mean | 1618 ns | 3198 ns | 20 | |
old | four_args_adj_jac/130/manual_time_median | 1626 ns | 3184 ns | 20 | |
old | four_args_adj_jac/130/manual_time_stddev | 18.6 ns | 46.2 ns | 20 | |
old | four_args_adj_jac/258/manual_time_mean | 2935 ns | 5784 ns | 20 | |
old | four_args_adj_jac/258/manual_time_median | 2933 ns | 5778 ns | 20 | |
old | four_args_adj_jac/258/manual_time_stddev | 20.3 ns | 37.0 ns | 20 | |
old | four_args_adj_jac/386/manual_time_mean | 4195 ns | 8433 ns | 20 | |
old | four_args_adj_jac/386/manual_time_median | 4199 ns | 8432 ns | 20 | |
old | four_args_adj_jac/386/manual_time_stddev | 14.8 ns | 24.8 ns | 20 | |
old | four_args_adj_jac/514/manual_time_mean | 5518 ns | 11344 ns | 20 | |
old | four_args_adj_jac/514/manual_time_median | 5513 ns | 11331 ns | 20 | |
old | four_args_adj_jac/514/manual_time_stddev | 22.5 ns | 60.3 ns | 20 | |
old | four_args_adj_jac/642/manual_time_mean | 6822 ns | 14349 ns | 20 | |
old | four_args_adj_jac/642/manual_time_median | 6824 ns | 14348 ns | 20 | |
old | four_args_adj_jac/642/manual_time_stddev | 39.7 ns | 57.4 ns | 20 | |
old | four_args_adj_jac/770/manual_time_mean | 8176 ns | 17516 ns | 20 | |
old | four_args_adj_jac/770/manual_time_median | 8118 ns | 17440 ns | 20 | |
old | four_args_adj_jac/770/manual_time_stddev | 249 ns | 359 ns | 20 | |
old | four_args_adj_jac/898/manual_time_mean | 9465 ns | 20602 ns | 20 | |
old | four_args_adj_jac/898/manual_time_median | 9462 ns | 20588 ns | 20 | |
old | four_args_adj_jac/898/manual_time_stddev | 38.3 ns | 79.9 ns | 20 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(ggplot2) | |
perf_dt = fread("./adj_jac_new_vs_old.csv") | |
colnames(perf_dt) = tolower(colnames(perf_dt)) | |
perf_dt = perf_dt[!grepl("median", benchmark)] | |
perf_dt[grepl("mean", benchmark), stat := "mean"] | |
perf_dt[grepl("stddev", benchmark), stat := "stddev"] | |
perf_dt[, size := as.numeric(sapply(strsplit(benchmark, "/"), "[[", 2))] | |
#perf_dt[, size := as.numeric(sub(".*/", "", benchmark))] | |
perf_dt[, time := as.numeric(sub("ns", "", time))] | |
perf_dt[, cpu := as.numeric(sub("ns", "", cpu))] | |
perf_dt[, bench := sapply(strsplit(benchmark, "/"), "[[", 1)] | |
perf_dt = perf_dt[!grepl("toss_me", benchmark),] | |
perf_dt[grepl("old", version), version := "current"] | |
ggplot(perf_dt[stat == "mean"], aes(x = size, y = cpu, color = version)) + | |
geom_line() + | |
facet_wrap(~bench, nrow = 3, ncol = 1) + | |
scale_y_log10() + | |
scale_x_log10() + | |
ylab("") + | |
xlab("Size of Vectors") + | |
ggtitle("Construction and Gradient Calc for New Vs. Current Adjoint Jac", "In Nanoseconds") | |
perf_dt[, benchmark := NULL] | |
perf_melt_dt = melt(perf_dt[, .(bench, size, cpu, stat, version)], id.vars = c("bench", "size", "stat", "version"), measure.vars = c("cpu")) | |
perf_cast_dt = dcast(perf_melt_dt, bench+size~version+stat, value.var = "value") | |
perf_cast_dt = perf_cast_dt[complete.cases(perf_cast_dt)] | |
perf_vs = perf_cast_dt[, .( | |
max_val = (current_mean + current_stddev * 2) / (new_mean + new_stddev * 2) - 1, | |
mean_val = (current_mean) / (new_mean) - 1, | |
min_val = (current_mean - current_stddev * 2) / (new_mean - new_stddev * 2) - 1 | |
), .(bench, size)] | |
ggplot(perf_vs, aes(x = size, y = mean_val, ymin = min_val, ymax = max_val, color = bench)) + | |
geom_line() + | |
geom_errorbar() + | |
geom_hline(yintercept = 0) + | |
facet_wrap(~bench, nrow = 3, ncol = 1, scales = "free_y") + | |
ylab("") + | |
xlab("Size of Vector") + | |
ggtitle("Compare New Vs. Current adjoint_jac_apply", "Larger # Means New is Better") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment