Steve Bronder SteveBronder

Operations in Eigen Expressions hold their arguments either by value or by reference. Which one is chosen depends on type of the argument. Other operations are held by value. "Heavy" objects that can hold data themselves, such as Eigen::Matrix or Eigen::Ref are instead held by reference. This is the only criterion - holding rvalue arguments by value is not supported, so we can not use perfect forwarding.

When returning an expression from a function we have to be careful that any

	#ifndef STAN_MATH_PRIM_FUNCTOR_FOR_EACH_HPP
	#define STAN_MATH_PRIM_FUNCTOR_FOR_EACH_HPP

	#include <stan/math/prim/meta.hpp>
	#include <functional>
	#include <tuple>
	#include <utility>

	namespace stan {
	namespace math {

	#include <benchmark/benchmark.h>
	#include <stan/math/mix.hpp>
	#include <Eigen/Dense>
	#include <utility>

	static bool run_once = true;
	// Just to fill up the stack allocator
	static void toss_me(benchmark::State& state) {
	using stan::math::var;
	using stan::math::var_value;

	Eigen::Matrix<local_scalar_t__, -1, 1> B_in__;
	B_in__
	= Eigen::Matrix<local_scalar_t__, -1, 1>((N + ((N * (N - 1)) / 2)));
	stan::math::fill(B_in__, DUMMY_VAR__);

	current_statement__ = 2;
	B_in__ = in__.vector((N + ((N * (N - 1)) / 2)));
	current_statement__ = 2;
	if (jacobian__) {
	current_statement__ = 2;

	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param beta \|1.010207675 - 1.012433331\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param delta \|0.354625684 - 0.358472662\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param gamma \|0.20025931 - 0.199601013\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param xi \|10.54545683 - 10.59873795\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param y.1.1 \|9580.51272 - 9581.77178\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param y.1.2 \|213.875447 - 213.509429\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param y.1.3 \|205.611855 - 204.718807\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param y.1.4 \|5695.40304 - 5659.0195\| not within 2e-08
	FAIL: golds/stat_comp_benchmarks_benchmarks_sir_sir.gold param y.10.1 \|4644.38261 - 4645.77645\| not within 2e-08
	FAIL: golds/s

	#!/bin/bash -e

	usage() {
	echo "=====!!!WARNING!!!===="
	echo "This will clean all repos involved! Use only on a clean checkout."
	echo "$0 \"<arguments to runPerformanceTests.py>\" <reference-cmdstan-git-hash> <cmdstan_pr_or_hash> <stan_pr> <math_pr>"
	}

	write_makelocal() {
	echo "CXXFLAGS += -march=native -mtune=native -O3 -Wno-ignored-attributes" > make/local

	Program received signal SIGSEGV, Segmentation fault.
	0x0000555555561f7c in _mm256_load_pd (__P=0x7fffe1a31020) at /usr/lib/gcc/x86_64-linux-gnu/9/include/avxintrin.h:862
	862 return (__m256d )__P;
	(gdb) bt
	#0 0x0000555555561f7c in _mm256_load_pd (__P=0x7fffe1a31020) at /usr/lib/gcc/x86_64-linux-gnu/9/include/avxintrin.h:862
	#1 Eigen::internal::pload<double __vector(4)>(Eigen::internal::unpacket_traits<double __vector(4)>::type const*) (from=0x7fffe1a31020)
	at math/lib/eigen_3.3.7/Eigen/src/Core/arch/AVX/PacketMath.h:215
	#2 0x0000555555595372 in Eigen::internal::ploadt<double __vector(4), 32>(Eigen::internal::unpacket_traits<double __vector(4)>::type const*) (from=0x7fffe1a31020)
	at math/lib/eigen_3.3.7/Eigen/src/Core/GenericPacketMath.h:463
	#3 Eigen::internal::evaluator<Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > >::packet<32, double __vector(4)>(long) const (this=0x7fffffffd008, index=0)

	--------------------------------------------------------------------------------------
	Benchmark Time CPU Iterations
	--------------------------------------------------------------------------------------
	matrix_value_of_eval_bench/2/manual_time 1498 ns 2820 ns 434421
	matrix_value_of_eval_bench/4/manual_time 1969 ns 4593 ns 355822
	matrix_value_of_eval_bench/8/manual_time 3877 ns 11459 ns 180711
	matrix_value_of_eval_bench/16/manual_time 11431 ns 38782 ns 61078
	matrix_value_of_eval_bench/32/manual_time 41160 ns 146687 ns 16991
	matrix_value_of_eval_bench/64/manual_time 159694 ns 577776 ns 4382
	matrix_value_of_eval_bench/128/manual_time 669791 ns 2344786 ns 1041

	#ifndef STAN_MATH_REV_FUN_LAMBERT_W_HPP
	#define STAN_MATH_REV_FUN_LAMBERT_W_HPP

	#include <stan/math/rev/core.hpp>
	#include <stan/math/rev/meta.hpp>
	#include <stan/math/prim/meta.hpp>
	#include <stan/math/prim/fun/lambert_w.hpp>

	namespace stan {
	namespace math {

	#include <benchmark/benchmark.h>
	#include <stan/math/mix.hpp>
	#include <Eigen/Dense>
	#include <utility>

	template <typename T1, typename T2>
	__attribute__ ((noinline, no_icf)) auto add_inner_const(const T1& A, const T2& B) {
	asm ("");
	return (A + B).eval();
	}