Last active
April 3, 2017 15:26
-
-
Save agarciamontoro/c5a302a9d2d17acf89946006be5aa73b to your computer and use it in GitHub Desktop.
Implementation of a vectorized version of TMath::Gaus
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 16daa489ab290265379e5ff44c9ceb871367b233 Mon Sep 17 00:00:00 2001 | |
From: =?UTF-8?q?Alejandro=20Garc=C3=ADa=20Montoro?= | |
<[email protected]> | |
Date: Tue, 28 Mar 2017 01:53:45 +0200 | |
Subject: [PATCH] Implements TMath::Gaus_v, vectorized version of TMath::Gaus | |
MIME-Version: 1.0 | |
Content-Type: multipart/mixed; boundary="------------2.12.1" | |
This is a multi-part message in MIME format. | |
--------------2.12.1 | |
Content-Type: text/plain; charset=UTF-8; format=fixed | |
Content-Transfer-Encoding: 8bit | |
--- | |
math/mathcore/CMakeLists.txt | 5 +- | |
math/mathcore/inc/TMath.h | 5 +- | |
math/mathcore/inc/TMath_VecTest.h | 43 +++++++++++++++ | |
math/mathcore/src/TMath.cxx | 60 +++++++++++++++++++++ | |
math/mathcore/test/vectorizationTest.cxx | 90 ++++++++++++++++++++++++++++++++ | |
5 files changed, 197 insertions(+), 6 deletions(-) | |
create mode 100644 math/mathcore/inc/TMath_VecTest.h | |
create mode 100644 math/mathcore/test/vectorizationTest.cxx | |
--------------2.12.1 | |
Content-Type: text/x-patch; name="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch" | |
Content-Transfer-Encoding: 8bit | |
Content-Disposition: attachment; filename="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch" | |
diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt | |
index 8bcbd1f760..df980d89af 100644 | |
--- a/math/mathcore/CMakeLists.txt | |
+++ b/math/mathcore/CMakeLists.txt | |
@@ -17,6 +17,7 @@ set(MATHCORE_HEADERS TRandom.h | |
Math/ChebyshevPol.h Math/KDTree.h Math/TDataPoint.h Math/TDataPointN.h Math/Delaunay2D.h | |
Math/Random.h Math/TRandomEngine.h Math/RandomFunctions.h Math/StdEngine.h | |
Math/MersenneTwisterEngine.h Math/MixMaxEngine.h TRandomGen.h Math/LCGEngine.h | |
+ TMath_VecTest.h | |
) | |
ROOT_GENERATE_DICTIONARY(G__MathCore TComplex.h TMath.h ${MATHCORE_HEADERS} Fit/*.h MODULE MathCore LINKDEF LinkDef.h OPTIONS "-writeEmptyRootPCM") | |
@@ -33,7 +34,3 @@ ROOT_LINKER_LIBRARY(MathCore *.cxx *.c G__MathCore.cxx LIBRARIES ${CMAKE_THREAD_ | |
ROOT_INSTALL_HEADERS() | |
ROOT_ADD_TEST_SUBDIRECTORY(test) | |
- | |
- | |
- | |
- | |
diff --git a/math/mathcore/inc/TMath.h b/math/mathcore/inc/TMath.h | |
index 5a6a365504..b9aea8db83 100644 | |
--- a/math/mathcore/inc/TMath.h | |
+++ b/math/mathcore/inc/TMath.h | |
@@ -35,6 +35,7 @@ | |
#include <limits> | |
#include <cmath> | |
+ | |
namespace TMath { | |
/* ************************* */ | |
@@ -600,7 +601,7 @@ namespace detailsForFastMath { | |
inline int IsNaN(double x) | |
{ | |
UInt_t hx, lx; | |
- | |
+ | |
EXTRACT_WORDS(hx, lx, x); | |
lx |= hx & 0xfffff; | |
@@ -888,7 +889,7 @@ Double_t TMath::RMS(Iterator first, Iterator last, WeightIterator w) | |
while ( first != last ) { | |
Double_t x = Double_t(*first); | |
sumw += *w; | |
- sumw2 += (*w) * (*w); | |
+ sumw2 += (*w) * (*w); | |
tot += (*w) * (x - mean)*(x - mean); | |
++first; | |
++w; | |
diff --git a/math/mathcore/inc/TMath_VecTest.h b/math/mathcore/inc/TMath_VecTest.h | |
new file mode 100644 | |
index 0000000000..dba0dc0109 | |
--- /dev/null | |
+++ b/math/mathcore/inc/TMath_VecTest.h | |
@@ -0,0 +1,43 @@ | |
+// @(#)root/mathcore:$Id$ | |
+// Authors: Alejandro García Montoro | |
+ | |
+/************************************************************************* | |
+ * Copyright (C) 2017, Alejandro García Montoro. * | |
+ * All rights reserved. * | |
+ * * | |
+ * For the licensing terms see $ROOTSYS/LICENSE. * | |
+ * For the list of contributors see $ROOTSYS/README/CREDITS. * | |
+ *************************************************************************/ | |
+ | |
+#ifndef ROOT_TMath_VecTest | |
+#define ROOT_TMath_VecTest | |
+ | |
+ | |
+////////////////////////////////////////////////////////////////////////// | |
+// // | |
+// TMath // | |
+// // | |
+// Encapsulate most frequently used Math functions. // | |
+// NB. The basic functions Min, Max, Abs and Sign are defined // | |
+// in TMathBase. // | |
+// // | |
+////////////////////////////////////////////////////////////////////////// | |
+#ifndef ROOT_Rtypes | |
+#include "Rtypes.h" | |
+#endif | |
+#ifndef ROOT_TMathBase | |
+#include "TMathBase.h" | |
+#endif | |
+ | |
+#include "TError.h" | |
+#include <algorithm> | |
+#include <limits> | |
+#include <cmath> | |
+ | |
+#include "Math/Math_vectypes.hxx" | |
+ | |
+namespace TMath{ | |
+ Double_v Gaus_v(Double_v x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE); | |
+} | |
+ | |
+#endif // ROOT_TMath_VecTest | |
diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx | |
index 942bb55637..11312ca758 100644 | |
--- a/math/mathcore/src/TMath.cxx | |
+++ b/math/mathcore/src/TMath.cxx | |
@@ -18,6 +18,7 @@ | |
////////////////////////////////////////////////////////////////////////// | |
#include "TMath.h" | |
+#include "TMath_VecTest.h" | |
#include "TError.h" | |
#include <math.h> | |
#include <string.h> | |
@@ -461,6 +462,65 @@ Double_t TMath::Gaus(Double_t x, Double_t mean, Double_t sigma, Bool_t norm) | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
+/// Calculate a gaussian function with mean and sigma. | |
+/// If norm=kTRUE (default is kFALSE) the result is divided | |
+/// by sqrt(2*Pi)*sigma. | |
+ | |
+Double_v TMath::Gaus_v(Double_v x, Double_t mean, Double_t sigma, Bool_t norm) | |
+{ | |
+ if (sigma == 0) | |
+ return 1.e30; | |
+ | |
+ Double_v arg = (x-mean)/sigma; | |
+ | |
+ // for |arg| > 39 result is zero in double precision | |
+ vecCore::Mask_v<Double_v> mask = !(arg < -39.0 || arg > 39.0); | |
+ | |
+ // Initialize the result to 0.0 | |
+ Double_v res(0.0); | |
+ | |
+ // Compute the function only when the arg meets the criteria, using the mask computed before | |
+ vecCore::MaskedAssign<Double_v>(res, mask, vecCore::math::Exp<Double_v>(-0.5 * arg * arg)); | |
+ | |
+ if (!norm) | |
+ return res; | |
+ | |
+ return res/(2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024 | |
+} | |
+ | |
+ | |
+//////////////////////////////////////////////////////////////////////////////// | |
+/// Calculate a gaussian function with mean and sigma. | |
+/// If norm=kTRUE (default is kFALSE) the result is divided | |
+/// by sqrt(2*Pi)*sigma. | |
+ | |
+// Double_v TMath::Gaus_v_v(Double_v x, Double_v mean, Double_v sigma, Mask<Double_v> norm) | |
+// { | |
+// vecCore::Mask_v<Double_v> null_sigma = (sigma == 0); | |
+// | |
+// Double_v res = 0.0; | |
+// res(null_sigma) = 1.e30; | |
+// | |
+// if(!null_sigma.all_of()) | |
+// { | |
+// Double_v arg; | |
+// | |
+// arg(!null_sigma) = (x-mean)/sigma; | |
+// | |
+// // for |arg| > 39 result is zero in double precision | |
+// vecCore::Mask_v<Double_v> good_arg = !(arg < -39.0 || arg > 39.0); | |
+// | |
+// res( !null_sigma && good_arg ) = vecCore::math::Exp(-0.5 * arg * arg); | |
+// | |
+// if (norm) | |
+// res(!null_sigma) /= (2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024 | |
+// } | |
+// | |
+// return res; | |
+// } | |
+ | |
+ | |
+//////////////////////////////////////////////////////////////////////////////// | |
/// The LANDAU function. | |
/// mu is a location parameter and correspond approximately to the most probable value | |
/// and sigma is a scale parameter (not the sigma of the full distribution which is not defined) | |
diff --git a/math/mathcore/test/vectorizationTest.cxx b/math/mathcore/test/vectorizationTest.cxx | |
new file mode 100644 | |
index 0000000000..e7d70b7771 | |
--- /dev/null | |
+++ b/math/mathcore/test/vectorizationTest.cxx | |
@@ -0,0 +1,90 @@ | |
+#include "Math/Random.h" | |
+ | |
+#include "TRandom1.h" | |
+#include "TStopwatch.h" | |
+#include "TMath_VecTest.h" | |
+ | |
+#include <iostream> | |
+#include <random> | |
+ | |
+// using namespace ROOT::Math; | |
+using namespace ROOT; | |
+using namespace std; | |
+ | |
+// TStopwatch w; w.S | |
+ | |
+int main(){ | |
+ // Test and vectorization size | |
+ const int numRepetitions = 1000; | |
+ const int inputSize = 100000; | |
+ const int vecSize = vecCore::VectorSize<Double_v>(); | |
+ | |
+ // Vectorized and linear input | |
+ Double_v vectorInput[inputSize]; | |
+ Double_t linearInput[inputSize * vecSize]; | |
+ | |
+ // Vectorized and linear output | |
+ Double_v vectorOutput[inputSize]; | |
+ Double_t linearOutput[inputSize * vecSize]; | |
+ | |
+ // Parameters vector | |
+ Double_t mean[inputSize]; | |
+ Double_t sigma[inputSize]; | |
+ | |
+ // Randomize input data and parameters | |
+ TRandom1 rndmzr; | |
+ rndmzr.RndmArray(inputSize * vecSize, linearInput); | |
+ rndmzr.RndmArray(inputSize, mean); | |
+ rndmzr.RndmArray(inputSize, sigma); | |
+ | |
+ // Set -100 < mean < 100 and 0 < sigma < 200 | |
+ for (size_t i = 0; i < inputSize; i++) { | |
+ mean[i] = mean[i] * 200 - 100; | |
+ sigma[i] *= 200; | |
+ } | |
+ | |
+ // Copy input linear data to the vectorized array | |
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) { | |
+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) { | |
+ vectorInput[caseIdx][vecIdx] = linearInput[vecSize * caseIdx + vecIdx]; | |
+ } | |
+ } | |
+ | |
+ // Clocks to measure (l)inear and (v)ectorized performance | |
+ TStopwatch clock_l, clock_v; | |
+ | |
+ // Vectorized computation | |
+ clock_v.Start(); | |
+ for (size_t _ = 0; _ < numRepetitions; _++) { | |
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) { | |
+ vectorOutput[caseIdx] = TMath::Gaus_v(vectorInput[caseIdx], mean[caseIdx], sigma[caseIdx]); | |
+ } | |
+ } | |
+ clock_v.Stop(); | |
+ | |
+ // Linear computation | |
+ int idx; | |
+ clock_l.Start(); | |
+ for (size_t _ = 0; _ < numRepetitions; _++) { | |
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) { | |
+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) { | |
+ idx = caseIdx * vecSize + vecIdx; | |
+ linearOutput[idx] = TMath::Gaus(linearInput[idx], mean[caseIdx], sigma[caseIdx]); | |
+ } | |
+ } | |
+ } | |
+ clock_l.Stop(); | |
+ | |
+ // Check | |
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) { | |
+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) { | |
+ Double_t diff = TMath::Abs(linearOutput[caseIdx*vecSize + vecIdx] - vectorOutput[caseIdx][vecIdx]); | |
+ if(diff > 1e-15) | |
+ std::cout << diff << std::endl; | |
+ } | |
+ } | |
+ | |
+ cout << "Linear: "; clock_l.Print(); | |
+ cout << "Vectorized: "; clock_v.Print(); | |
+ cout << "SPEEDUP: x" << clock_l.RealTime() / clock_v.RealTime() << endl; | |
+} | |
--------------2.12.1-- | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment