agarciamontoro · April 3, 2017 15:26
diff --git a/0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch b/0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch
 From 16daa489ab290265379e5ff44c9ceb871367b233 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Alejandro=20Garc=C3=ADa=20Montoro?=
 <[email protected]>
 Date: Tue, 28 Mar 2017 01:53:45 +0200
 Subject: [PATCH] Implements TMath::Gaus_v, vectorized version of TMath::Gaus
 MIME-Version: 1.0
 Content-Type: multipart/mixed; boundary="------------2.12.1"

 This is a multi-part message in MIME format.
 --------------2.12.1
 Content-Type: text/plain; charset=UTF-8; format=fixed
 Content-Transfer-Encoding: 8bit

 ---
 math/mathcore/CMakeLists.txt             |  5 +-
 math/mathcore/inc/TMath.h                |  5 +-
 math/mathcore/inc/TMath_VecTest.h        | 43 +++++++++++++++
 math/mathcore/src/TMath.cxx              | 60 +++++++++++++++++++++
 math/mathcore/test/vectorizationTest.cxx | 90 ++++++++++++++++++++++++++++++++
 5 files changed, 197 insertions(+), 6 deletions(-)
 create mode 100644 math/mathcore/inc/TMath_VecTest.h
 create mode 100644 math/mathcore/test/vectorizationTest.cxx


 --------------2.12.1
 Content-Type: text/x-patch; name="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"
 Content-Transfer-Encoding: 8bit
 Content-Disposition: attachment; filename="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"

 diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt
 index 8bcbd1f760..df980d89af 100644
 --- a/math/mathcore/CMakeLists.txt
 +++ b/math/mathcore/CMakeLists.txt
 @@ -17,6 +17,7 @@ set(MATHCORE_HEADERS TRandom.h
   Math/ChebyshevPol.h Math/KDTree.h Math/TDataPoint.h Math/TDataPointN.h Math/Delaunay2D.h
   Math/Random.h Math/TRandomEngine.h Math/RandomFunctions.h Math/StdEngine.h
   Math/MersenneTwisterEngine.h Math/MixMaxEngine.h   TRandomGen.h Math/LCGEngine.h
 +  TMath_VecTest.h
 )
 
 ROOT_GENERATE_DICTIONARY(G__MathCore   TComplex.h TMath.h ${MATHCORE_HEADERS} Fit/*.h MODULE MathCore LINKDEF LinkDef.h OPTIONS "-writeEmptyRootPCM")
 @@ -33,7 +34,3 @@ ROOT_LINKER_LIBRARY(MathCore *.cxx *.c G__MathCore.cxx LIBRARIES ${CMAKE_THREAD_
 ROOT_INSTALL_HEADERS()
 
 ROOT_ADD_TEST_SUBDIRECTORY(test)
 -
 -
 -
 -
 diff --git a/math/mathcore/inc/TMath.h b/math/mathcore/inc/TMath.h
 index 5a6a365504..b9aea8db83 100644
 --- a/math/mathcore/inc/TMath.h
 +++ b/math/mathcore/inc/TMath.h
 @@ -35,6 +35,7 @@
 #include <limits>
 #include <cmath>
 
 +
 namespace TMath {
 
    /* ************************* */
 @@ -600,7 +601,7 @@ namespace detailsForFastMath {
    inline int IsNaN(double x)
    {
       UInt_t hx, lx;
 -      
 +
       EXTRACT_WORDS(hx, lx, x);
 
       lx |= hx & 0xfffff;
 @@ -888,7 +889,7 @@ Double_t TMath::RMS(Iterator first, Iterator last, WeightIterator w)
    while ( first != last ) {
       Double_t x = Double_t(*first);
       sumw += *w;
 -      sumw2 += (*w) * (*w); 
 +      sumw2 += (*w) * (*w);
       tot += (*w) * (x - mean)*(x - mean);
       ++first;
       ++w;
 diff --git a/math/mathcore/inc/TMath_VecTest.h b/math/mathcore/inc/TMath_VecTest.h
 new file mode 100644
 index 0000000000..dba0dc0109
 --- /dev/null
 +++ b/math/mathcore/inc/TMath_VecTest.h
 @@ -0,0 +1,43 @@
 +// @(#)root/mathcore:$Id$
 +// Authors: Alejandro García Montoro
 +
 +/*************************************************************************
 + * Copyright (C) 2017, Alejandro García Montoro.                         *
 + * All rights reserved.                                                  *
 + *                                                                       *
 + * For the licensing terms see $ROOTSYS/LICENSE.                         *
 + * For the list of contributors see $ROOTSYS/README/CREDITS.             *
 + *************************************************************************/
 +
 +#ifndef ROOT_TMath_VecTest
 +#define ROOT_TMath_VecTest
 +
 +
 +//////////////////////////////////////////////////////////////////////////
 +//                                                                      //
 +// TMath                                                                //
 +//                                                                      //
 +// Encapsulate most frequently used Math functions.                     //
 +// NB. The basic functions Min, Max, Abs and Sign are defined           //
 +// in TMathBase.                                                        //
 +//                                                                      //
 +//////////////////////////////////////////////////////////////////////////
 +#ifndef ROOT_Rtypes
 +#include "Rtypes.h"
 +#endif
 +#ifndef ROOT_TMathBase
 +#include "TMathBase.h"
 +#endif
 +
 +#include "TError.h"
 +#include <algorithm>
 +#include <limits>
 +#include <cmath>
 +
 +#include "Math/Math_vectypes.hxx"
 +
 +namespace TMath{
 +    Double_v Gaus_v(Double_v x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE);
 +}
 +
 +#endif // ROOT_TMath_VecTest
 diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx
 index 942bb55637..11312ca758 100644
 --- a/math/mathcore/src/TMath.cxx
 +++ b/math/mathcore/src/TMath.cxx
 @@ -18,6 +18,7 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "TMath.h"
 +#include "TMath_VecTest.h"
 #include "TError.h"
 #include <math.h>
 #include <string.h>
 @@ -461,6 +462,65 @@ Double_t TMath::Gaus(Double_t x, Double_t mean, Double_t sigma, Bool_t norm)
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 +/// Calculate a gaussian function with mean and sigma.
 +/// If norm=kTRUE (default is kFALSE) the result is divided
 +/// by sqrt(2*Pi)*sigma.
 +
 +Double_v TMath::Gaus_v(Double_v x, Double_t mean, Double_t sigma, Bool_t norm)
 +{
 +   if (sigma == 0)
 +      return 1.e30;
 +
 +   Double_v arg = (x-mean)/sigma;
 +
 +   // for |arg| > 39  result is zero in double precision
 +   vecCore::Mask_v<Double_v> mask = !(arg < -39.0 || arg > 39.0);
 +
 +   // Initialize the result to 0.0
 +   Double_v res(0.0);
 +
 +   // Compute the function only when the arg meets the criteria, using the mask computed before
 +   vecCore::MaskedAssign<Double_v>(res, mask, vecCore::math::Exp<Double_v>(-0.5 * arg * arg));
 +
 +   if (!norm)
 +      return res;
 +
 +   return res/(2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024
 +}
 +
 +
 +////////////////////////////////////////////////////////////////////////////////
 +/// Calculate a gaussian function with mean and sigma.
 +/// If norm=kTRUE (default is kFALSE) the result is divided
 +/// by sqrt(2*Pi)*sigma.
 +
 +// Double_v TMath::Gaus_v_v(Double_v x, Double_v mean, Double_v sigma, Mask<Double_v> norm)
 +// {
 +//    vecCore::Mask_v<Double_v> null_sigma = (sigma == 0);
 +//
 +//    Double_v res = 0.0;
 +//    res(null_sigma) = 1.e30;
 +//
 +//    if(!null_sigma.all_of())
 +//    {
 +//       Double_v arg;
 +//
 +//       arg(!null_sigma) = (x-mean)/sigma;
 +//
 +//       // for |arg| > 39  result is zero in double precision
 +//       vecCore::Mask_v<Double_v> good_arg = !(arg < -39.0 || arg > 39.0);
 +//
 +//       res( !null_sigma && good_arg ) = vecCore::math::Exp(-0.5 * arg * arg);
 +//
 +//       if (norm)
 +//          res(!null_sigma) /= (2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024
 +//    }
 +//
 +//    return res;
 +// }
 +
 +
 +////////////////////////////////////////////////////////////////////////////////
 /// The LANDAU function.
 /// mu is a location parameter and correspond approximately to the most probable value
 /// and sigma is a scale parameter (not the sigma of the full distribution which is not defined)
 diff --git a/math/mathcore/test/vectorizationTest.cxx b/math/mathcore/test/vectorizationTest.cxx
 new file mode 100644
 index 0000000000..e7d70b7771
 --- /dev/null
 +++ b/math/mathcore/test/vectorizationTest.cxx
 @@ -0,0 +1,90 @@
 +#include "Math/Random.h"
 +
 +#include "TRandom1.h"
 +#include "TStopwatch.h"
 +#include "TMath_VecTest.h"
 +
 +#include <iostream>
 +#include <random>
 +
 +// using namespace ROOT::Math;
 +using namespace ROOT;
 +using namespace std;
 +
 +// TStopwatch w; w.S
 +
 +int main(){
 +    // Test and vectorization size
 +    const int numRepetitions = 1000;
 +    const int inputSize = 100000;
 +    const int vecSize = vecCore::VectorSize<Double_v>();
 +
 +    // Vectorized and linear input
 +    Double_v vectorInput[inputSize];
 +    Double_t linearInput[inputSize * vecSize];
 +
 +    // Vectorized and linear output
 +    Double_v vectorOutput[inputSize];
 +    Double_t linearOutput[inputSize * vecSize];
 +
 +    // Parameters vector
 +    Double_t mean[inputSize];
 +    Double_t sigma[inputSize];
 +
 +    // Randomize input data and parameters
 +    TRandom1 rndmzr;
 +    rndmzr.RndmArray(inputSize * vecSize, linearInput);
 +    rndmzr.RndmArray(inputSize, mean);
 +    rndmzr.RndmArray(inputSize, sigma);
 +
 +    // Set -100 < mean < 100 and 0 < sigma < 200
 +    for (size_t i = 0; i < inputSize; i++) {
 +        mean[i] = mean[i] * 200 - 100;
 +        sigma[i] *= 200;
 +    }
 +
 +    // Copy input linear data to the vectorized array
 +    for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
 +        for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
 +            vectorInput[caseIdx][vecIdx] = linearInput[vecSize * caseIdx + vecIdx];
 +        }
 +    }
 +
 +    // Clocks to measure (l)inear and (v)ectorized performance
 +    TStopwatch clock_l, clock_v;
 +
 +    // Vectorized computation
 +    clock_v.Start();
 +    for (size_t _ = 0; _ < numRepetitions; _++) {
 +        for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
 +            vectorOutput[caseIdx] = TMath::Gaus_v(vectorInput[caseIdx], mean[caseIdx], sigma[caseIdx]);
 +        }
 +    }
 +    clock_v.Stop();
 +
 +    // Linear computation
 +    int idx;
 +    clock_l.Start();
 +    for (size_t _ = 0; _ < numRepetitions; _++) {
 +    for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
 +            for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
 +                idx = caseIdx * vecSize + vecIdx;
 +                linearOutput[idx] = TMath::Gaus(linearInput[idx], mean[caseIdx], sigma[caseIdx]);
 +            }
 +        }
 +    }
 +    clock_l.Stop();
 +
 +    // Check
 +    for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
 +        for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
 +            Double_t diff = TMath::Abs(linearOutput[caseIdx*vecSize + vecIdx] - vectorOutput[caseIdx][vecIdx]);
 +            if(diff > 1e-15)
 +                std::cout << diff << std::endl;
 +        }
 +    }
 +
 +    cout << "Linear:     "; clock_l.Print();
 +    cout << "Vectorized: "; clock_v.Print();
 +    cout << "SPEEDUP:    x" << clock_l.RealTime() / clock_v.RealTime() << endl;
 +}

 --------------2.12.1--
	From 16daa489ab290265379e5ff44c9ceb871367b233 Mon Sep 17 00:00:00 2001
	From: =?UTF-8?q?Alejandro=20Garc=C3=ADa=20Montoro?=
	<[email protected]>
	Date: Tue, 28 Mar 2017 01:53:45 +0200
	Subject: [PATCH] Implements TMath::Gaus_v, vectorized version of TMath::Gaus
	MIME-Version: 1.0
	Content-Type: multipart/mixed; boundary="------------2.12.1"

	This is a multi-part message in MIME format.
	--------------2.12.1
	Content-Type: text/plain; charset=UTF-8; format=fixed
	Content-Transfer-Encoding: 8bit

	---
	math/mathcore/CMakeLists.txt \| 5 +-
	math/mathcore/inc/TMath.h \| 5 +-
	math/mathcore/inc/TMath_VecTest.h \| 43 +++++++++++++++
	math/mathcore/src/TMath.cxx \| 60 +++++++++++++++++++++
	math/mathcore/test/vectorizationTest.cxx \| 90 ++++++++++++++++++++++++++++++++
	5 files changed, 197 insertions(+), 6 deletions(-)
	create mode 100644 math/mathcore/inc/TMath_VecTest.h
	create mode 100644 math/mathcore/test/vectorizationTest.cxx


	--------------2.12.1
	Content-Type: text/x-patch; name="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"
	Content-Transfer-Encoding: 8bit
	Content-Disposition: attachment; filename="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"

	diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt
	index 8bcbd1f760..df980d89af 100644
	--- a/math/mathcore/CMakeLists.txt
	+++ b/math/mathcore/CMakeLists.txt
	@@ -17,6 +17,7 @@ set(MATHCORE_HEADERS TRandom.h
	Math/ChebyshevPol.h Math/KDTree.h Math/TDataPoint.h Math/TDataPointN.h Math/Delaunay2D.h
	Math/Random.h Math/TRandomEngine.h Math/RandomFunctions.h Math/StdEngine.h
	Math/MersenneTwisterEngine.h Math/MixMaxEngine.h TRandomGen.h Math/LCGEngine.h
	+ TMath_VecTest.h
	)

	ROOT_GENERATE_DICTIONARY(G__MathCore TComplex.h TMath.h ${MATHCORE_HEADERS} Fit/*.h MODULE MathCore LINKDEF LinkDef.h OPTIONS "-writeEmptyRootPCM")
	@@ -33,7 +34,3 @@ ROOT_LINKER_LIBRARY(MathCore .cxx .c G__MathCore.cxx LIBRARIES ${CMAKE_THREAD_
	ROOT_INSTALL_HEADERS()

	ROOT_ADD_TEST_SUBDIRECTORY(test)
	-
	-
	-
	-
	diff --git a/math/mathcore/inc/TMath.h b/math/mathcore/inc/TMath.h
	index 5a6a365504..b9aea8db83 100644
	--- a/math/mathcore/inc/TMath.h
	+++ b/math/mathcore/inc/TMath.h
	@@ -35,6 +35,7 @@
	#include <limits>
	#include <cmath>

	+
	namespace TMath {

	/* ************************* */
	@@ -600,7 +601,7 @@ namespace detailsForFastMath {
	inline int IsNaN(double x)
	{
	UInt_t hx, lx;
	-
	+
	EXTRACT_WORDS(hx, lx, x);

	lx \|= hx & 0xfffff;
	@@ -888,7 +889,7 @@ Double_t TMath::RMS(Iterator first, Iterator last, WeightIterator w)
	while ( first != last ) {
	Double_t x = Double_t(*first);
	sumw += *w;
	- sumw2 += (w) (*w);
	+ sumw2 += (w) (*w);
	tot += (w) (x - mean)*(x - mean);
	++first;
	++w;
	diff --git a/math/mathcore/inc/TMath_VecTest.h b/math/mathcore/inc/TMath_VecTest.h
	new file mode 100644
	index 0000000000..dba0dc0109
	--- /dev/null
	+++ b/math/mathcore/inc/TMath_VecTest.h
	@@ -0,0 +1,43 @@
	+// @(#)root/mathcore:$Id$
	+// Authors: Alejandro García Montoro
	+
	+/*************************************************************************
	+ * Copyright (C) 2017, Alejandro García Montoro. *
	+ * All rights reserved. *
	+ * *
	+ * For the licensing terms see $ROOTSYS/LICENSE. *
	+ * For the list of contributors see $ROOTSYS/README/CREDITS. *
	+ *************************************************************************/
	+
	+#ifndef ROOT_TMath_VecTest
	+#define ROOT_TMath_VecTest
	+
	+
	+//////////////////////////////////////////////////////////////////////////
	+// //
	+// TMath //
	+// //
	+// Encapsulate most frequently used Math functions. //
	+// NB. The basic functions Min, Max, Abs and Sign are defined //
	+// in TMathBase. //
	+// //
	+//////////////////////////////////////////////////////////////////////////
	+#ifndef ROOT_Rtypes
	+#include "Rtypes.h"
	+#endif
	+#ifndef ROOT_TMathBase
	+#include "TMathBase.h"
	+#endif
	+
	+#include "TError.h"
	+#include <algorithm>
	+#include <limits>
	+#include <cmath>
	+
	+#include "Math/Math_vectypes.hxx"
	+
	+namespace TMath{
	+ Double_v Gaus_v(Double_v x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE);
	+}
	+
	+#endif // ROOT_TMath_VecTest
	diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx
	index 942bb55637..11312ca758 100644
	--- a/math/mathcore/src/TMath.cxx
	+++ b/math/mathcore/src/TMath.cxx
	@@ -18,6 +18,7 @@
	//////////////////////////////////////////////////////////////////////////

	#include "TMath.h"
	+#include "TMath_VecTest.h"
	#include "TError.h"
	#include <math.h>
	#include <string.h>
	@@ -461,6 +462,65 @@ Double_t TMath::Gaus(Double_t x, Double_t mean, Double_t sigma, Bool_t norm)
	}

	////////////////////////////////////////////////////////////////////////////////
	+/// Calculate a gaussian function with mean and sigma.
	+/// If norm=kTRUE (default is kFALSE) the result is divided
	+/// by sqrt(2Pi)sigma.
	+
	+Double_v TMath::Gaus_v(Double_v x, Double_t mean, Double_t sigma, Bool_t norm)
	+{
	+ if (sigma == 0)
	+ return 1.e30;
	+
	+ Double_v arg = (x-mean)/sigma;
	+
	+ // for \|arg\| > 39 result is zero in double precision
	+ vecCore::Mask_v<Double_v> mask = !(arg < -39.0 \|\| arg > 39.0);
	+
	+ // Initialize the result to 0.0
	+ Double_v res(0.0);
	+
	+ // Compute the function only when the arg meets the criteria, using the mask computed before
	+ vecCore::MaskedAssign<Double_v>(res, mask, vecCore::math::Exp<Double_v>(-0.5 * arg * arg));
	+
	+ if (!norm)
	+ return res;
	+
	+ return res/(2.50662827463100024sigma); //sqrt(2Pi)=2.50662827463100024
	+}
	+
	+
	+////////////////////////////////////////////////////////////////////////////////
	+/// Calculate a gaussian function with mean and sigma.
	+/// If norm=kTRUE (default is kFALSE) the result is divided
	+/// by sqrt(2Pi)sigma.
	+
	+// Double_v TMath::Gaus_v_v(Double_v x, Double_v mean, Double_v sigma, Mask<Double_v> norm)
	+// {
	+// vecCore::Mask_v<Double_v> null_sigma = (sigma == 0);
	+//
	+// Double_v res = 0.0;
	+// res(null_sigma) = 1.e30;
	+//
	+// if(!null_sigma.all_of())
	+// {
	+// Double_v arg;
	+//
	+// arg(!null_sigma) = (x-mean)/sigma;
	+//
	+// // for \|arg\| > 39 result is zero in double precision
	+// vecCore::Mask_v<Double_v> good_arg = !(arg < -39.0 \|\| arg > 39.0);
	+//
	+// res( !null_sigma && good_arg ) = vecCore::math::Exp(-0.5 * arg * arg);
	+//
	+// if (norm)
	+// res(!null_sigma) /= (2.50662827463100024sigma); //sqrt(2Pi)=2.50662827463100024
	+// }
	+//
	+// return res;
	+// }
	+
	+
	+////////////////////////////////////////////////////////////////////////////////
	/// The LANDAU function.
	/// mu is a location parameter and correspond approximately to the most probable value
	/// and sigma is a scale parameter (not the sigma of the full distribution which is not defined)
	diff --git a/math/mathcore/test/vectorizationTest.cxx b/math/mathcore/test/vectorizationTest.cxx
	new file mode 100644
	index 0000000000..e7d70b7771
	--- /dev/null
	+++ b/math/mathcore/test/vectorizationTest.cxx
	@@ -0,0 +1,90 @@
	+#include "Math/Random.h"
	+
	+#include "TRandom1.h"
	+#include "TStopwatch.h"
	+#include "TMath_VecTest.h"
	+
	+#include <iostream>
	+#include <random>
	+
	+// using namespace ROOT::Math;
	+using namespace ROOT;
	+using namespace std;
	+
	+// TStopwatch w; w.S
	+
	+int main(){
	+ // Test and vectorization size
	+ const int numRepetitions = 1000;
	+ const int inputSize = 100000;
	+ const int vecSize = vecCore::VectorSize<Double_v>();
	+
	+ // Vectorized and linear input
	+ Double_v vectorInput[inputSize];
	+ Double_t linearInput[inputSize * vecSize];
	+
	+ // Vectorized and linear output
	+ Double_v vectorOutput[inputSize];
	+ Double_t linearOutput[inputSize * vecSize];
	+
	+ // Parameters vector
	+ Double_t mean[inputSize];
	+ Double_t sigma[inputSize];
	+
	+ // Randomize input data and parameters
	+ TRandom1 rndmzr;
	+ rndmzr.RndmArray(inputSize * vecSize, linearInput);
	+ rndmzr.RndmArray(inputSize, mean);
	+ rndmzr.RndmArray(inputSize, sigma);
	+
	+ // Set -100 < mean < 100 and 0 < sigma < 200
	+ for (size_t i = 0; i < inputSize; i++) {
	+ mean[i] = mean[i] * 200 - 100;
	+ sigma[i] *= 200;
	+ }
	+
	+ // Copy input linear data to the vectorized array
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
	+ vectorInput[caseIdx][vecIdx] = linearInput[vecSize * caseIdx + vecIdx];
	+ }
	+ }
	+
	+ // Clocks to measure (l)inear and (v)ectorized performance
	+ TStopwatch clock_l, clock_v;
	+
	+ // Vectorized computation
	+ clock_v.Start();
	+ for (size_t _ = 0; _ < numRepetitions; _++) {
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ vectorOutput[caseIdx] = TMath::Gaus_v(vectorInput[caseIdx], mean[caseIdx], sigma[caseIdx]);
	+ }
	+ }
	+ clock_v.Stop();
	+
	+ // Linear computation
	+ int idx;
	+ clock_l.Start();
	+ for (size_t _ = 0; _ < numRepetitions; _++) {
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
	+ idx = caseIdx * vecSize + vecIdx;
	+ linearOutput[idx] = TMath::Gaus(linearInput[idx], mean[caseIdx], sigma[caseIdx]);
	+ }
	+ }
	+ }
	+ clock_l.Stop();
	+
	+ // Check
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
	+ Double_t diff = TMath::Abs(linearOutput[caseIdx*vecSize + vecIdx] - vectorOutput[caseIdx][vecIdx]);
	+ if(diff > 1e-15)
	+ std::cout << diff << std::endl;
	+ }
	+ }
	+
	+ cout << "Linear: "; clock_l.Print();
	+ cout << "Vectorized: "; clock_v.Print();
	+ cout << "SPEEDUP: x" << clock_l.RealTime() / clock_v.RealTime() << endl;
	+}

	--------------2.12.1--