Last active
May 4, 2017 03:50
-
-
Save willkill07/440fbd97f4fa12346598128463b4b8bd to your computer and use it in GitHub Desktop.
RAJA Reduce Redux
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// use C++11 to compile | |
#include <array> | |
#include <limits> | |
#include <numeric> | |
#include <type_traits> | |
#ifdef _OPENMP | |
#include <omp.h> | |
#endif | |
#define RAJA_HOST_DEVICE | |
template <typename...> | |
using void_t = void; | |
namespace RAJA { | |
enum class Policy { undefined, sequential, openmp, cuda, cilk }; | |
// RAJA operators already exists -- reuse what we have; enhance where necessary | |
namespace operators { | |
using IndexType = int; | |
template <typename T, typename I = IndexType> | |
struct LocationPair { | |
using index_type = I; | |
using value_type = T; | |
T value; | |
I index; | |
}; | |
template <typename T> | |
struct plus { | |
constexpr RAJA_HOST_DEVICE T | |
operator()(T const& t1, T const& t2) const { | |
return t1 + t2; | |
} | |
}; | |
template <typename T> | |
struct multiplies { | |
constexpr RAJA_HOST_DEVICE T | |
operator()(T const& t1, T const& t2) const { | |
return t1 * t2; | |
} | |
}; | |
template <typename T> | |
struct min { | |
constexpr RAJA_HOST_DEVICE T | |
operator()(T const& t1, T const& t2) const { | |
return (t1 < t2) ? t1 : t2; | |
} | |
}; | |
template <typename T> | |
struct max { | |
constexpr RAJA_HOST_DEVICE T | |
operator()(T const& t1, T const& t2) const { | |
return (t1 > t2) ? t1 : t2; | |
} | |
}; | |
template <typename Pair> | |
struct min_loc { | |
constexpr RAJA_HOST_DEVICE Pair | |
operator()(Pair const& p1, Pair const& p2) const { | |
return (p1.value < p2.value) ? p1 : p2; | |
} | |
}; | |
template <typename Pair> | |
struct max_loc { | |
constexpr RAJA_HOST_DEVICE Pair | |
operator()(Pair const& p1, Pair const& p2) const { | |
return (p1.value > p2.value) ? p1 : p2; | |
} | |
}; | |
// RAJA operators should have defaults! | |
// Each binary associative operator currently has ::identity | |
// Need to add for {min,max}_loc | |
// note: I am not sure which approach is better? | |
template <template <typename...> class Operator> | |
struct defaults {}; | |
template <> | |
struct defaults<plus> { | |
template <typename T> | |
static constexpr RAJA_HOST_DEVICE T | |
value() { | |
return T(0); | |
} | |
}; | |
template <> | |
struct defaults<multiplies> { | |
template <typename T> | |
static constexpr RAJA_HOST_DEVICE T | |
value() { | |
return T(1); | |
} | |
}; | |
template <> | |
struct defaults<min> { | |
template <typename T> | |
static constexpr RAJA_HOST_DEVICE T | |
value() { | |
// use RAJA::operators::limits<T>::max(); | |
return std::numeric_limits<T>::max(); | |
} | |
}; | |
template <> | |
struct defaults<max> { | |
template <typename T> | |
static constexpr RAJA_HOST_DEVICE T | |
value() { | |
// use RAJA::operators::limits<T>::min(); | |
return std::numeric_limits<T>::min(); | |
} | |
}; | |
// be smart and reuse for {min,max}_loc! | |
template <> | |
struct defaults<min_loc> { | |
template <typename T, typename I = typename LocationPair<T>::index_type> | |
static constexpr RAJA_HOST_DEVICE LocationPair<T, I> | |
value() { | |
return LocationPair<T, I>{defaults<min>::value<T>(), I()}; | |
} | |
}; | |
template <> | |
struct defaults<max_loc> { | |
template <typename T, typename I = typename LocationPair<T>::index_type> | |
static constexpr RAJA_HOST_DEVICE LocationPair<T, I> | |
value() { | |
return LocationPair<T, I>{defaults<max>::value<T>(), I()}; | |
} | |
}; | |
} | |
// This is defined for all reducers | |
template <Policy Pol, template <typename> class Op, typename T> | |
struct ReduceBase { | |
using value_type = decltype(operators::defaults<Op>::template value<T>()); | |
ReduceBase() = default; | |
template <typename... Ts> | |
constexpr ReduceBase(Ts&&... ts) : value{ts...} {} | |
constexpr ReduceBase(T const& v) : value{v} {} | |
ReduceBase(ReduceBase const&) = delete; | |
template <typename... Ts> | |
value_type | |
reduce(Ts&&... ts) const { | |
value = Op<value_type>{}(value, value_type{ts...}); | |
return value; | |
} | |
constexpr operator value_type() const { return value; } | |
value_type | |
get() const { | |
return value; | |
} | |
void | |
reset(value_type v = operators::defaults<Op>::template value<T>()) const { | |
value = v; | |
} | |
// conditionally enable aggregate operators pending Policy type | |
template <typename U> | |
value_type | |
operator+=(U&& v) const { | |
static_assert(std::is_same<Op<U>, operators::plus<U>>::value, | |
"invalid reduction function for operator"); | |
return reduce(v); | |
} | |
template <typename U> | |
value_type | |
operator*=(U&& v) const { | |
static_assert(std::is_same<Op<U>, operators::multiplies<U>>::value, | |
"invalid reduction function for operator"); | |
return reduce(v); | |
} | |
template <typename U> | |
value_type | |
min(U&& v) const { | |
static_assert(std::is_same<Op<U>, operators::min<U>>::value, | |
"invalid reduction function for operator"); | |
return reduce(v); | |
} | |
template <typename U> | |
value_type | |
max(U&& v) const { | |
static_assert(std::is_same<Op<U>, operators::max<U>>::value, | |
"invalid reduction function for operator"); | |
return reduce(v); | |
} | |
template <typename V, typename I> | |
value_type | |
min_loc(V&& v, I&& i) { | |
using Pair = operators::LocationPair<V, I>; | |
static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value, | |
"invalid reduction function for operator"); | |
return reduce(v, i); | |
} | |
template <typename V, typename I> | |
value_type | |
max_loc(V&& v, I&& i) { | |
using Pair = operators::LocationPair<V, I>; | |
static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value, | |
"invalid reduction function for operator"); | |
return reduce(v, i); | |
} | |
protected: | |
mutable value_type value{operators::defaults<Op>::template value<T>()}; | |
}; | |
// default Reduction class -- note how it is empty :) | |
template <Policy Pol, template <typename> class Op, typename T> | |
struct Reduce {}; | |
// magic grab-all for specializing on a Policy type | |
template <template <typename> class Op, typename T> | |
struct Reduce<Policy::sequential, Op, T> | |
: public ReduceBase<Policy::sequential, Op, T> { | |
using parent_t = ReduceBase<Policy::sequential, Op, T>; | |
using parent_t::get; | |
Reduce() = default; | |
Reduce(Reduce&&) = default; | |
Reduce(Reduce const& par) : parent{&par}, owner{false} {} | |
~Reduce() { | |
if (!owner) { | |
parent->reduce(get()); | |
} | |
} | |
private: | |
Reduce const* parent; | |
bool const owner{true}; | |
}; | |
#ifdef _OPENMP | |
// I know this doesn't work right now, but I think you get the idea... | |
template <template <typename> class Op, typename T> | |
struct Reduce<Policy::openmp, Op, T> | |
: public ReduceBase<Policy::openmp, Op, T> { | |
using parent_t = ReduceBase<Policy::openmp, Op, T>; | |
using parent_t::get; | |
using value_type = typename parent_t::value_type; | |
Reduce() = default; | |
template <typename... Ts> | |
constexpr Reduce(Ts&&... ts) : parent_t{ts...}, lock{new omp_lock_t}, data_block{new value_type[omp_get_max_threads()]} { | |
omp_init_lock(lock); | |
} | |
Reduce(Reduce&&) = default; | |
Reduce(Reduce const& par) : lock{par.lock}, parent{&par}, data_block{par.data_block}, owner{false} {} | |
~Reduce() { | |
if (!owner) { | |
omp_set_lock(lock); | |
parent->reduce(get()); | |
omp_unset_lock(lock); | |
} else { | |
for (int i = 0; i < omp_get_max_threads(); ++i) { | |
reduce(data_block[i]); | |
} | |
omp_destroy_lock(lock); | |
delete lock; | |
delete[] data_block; | |
} | |
} | |
private: | |
omp_lock_t* lock{nullptr}; | |
Reduce const* parent{nullptr}; | |
value_type* data_block{nullptr}; | |
bool const owner{true}; | |
}; | |
#endif | |
// TODO for other policies -- modify constructors and storage accordingly | |
// Convenience type definitions | |
template <Policy Pol, typename T> | |
using ReduceMin = Reduce<Pol, operators::min, T>; | |
template <Policy Pol, typename T> | |
using ReduceMax = Reduce<Pol, operators::max, T>; | |
template <Policy Pol, typename T> | |
using ReduceSum = Reduce<Pol, operators::plus, T>; | |
template <Policy Pol, typename T> | |
using ReduceProduct = Reduce<Pol, operators::multiplies, T>; | |
template <Policy Pol, typename T> | |
using ReduceMinLoc = Reduce<Pol, operators::min_loc, T>; | |
template <Policy Pol, typename T> | |
using ReduceMaxLoc = Reduce<Pol, operators::max_loc, T>; | |
} | |
// Additional discussion -- reductions of arrays | |
// | |
// 1). define/use something similar to span<> from GSL | |
// 2). specialize min/max/plus for span<> | |
// 3). specialize operators::defaults for span<> | |
// 4). everything else should just work | |
// example code | |
template <typename Fn> | |
void | |
forall(int begin, int end, Fn&& f) { | |
for (int i = begin; i < end; ++i) { | |
f(i); | |
} | |
} | |
void | |
get_stats(double* array, int N) { | |
RAJA::ReduceMin<RAJA::Policy::sequential, double> min; | |
RAJA::ReduceMax<RAJA::Policy::sequential, double> max; | |
RAJA::ReduceSum<RAJA::Policy::sequential, double> sum; | |
forall(0, N, [=](int i) { | |
min.min(array[i]); | |
max.max(array[i]); | |
sum += array[i]; | |
}); | |
printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(), | |
sum.get()); | |
} | |
void | |
get_stats2(double* array, int N) { | |
RAJA::ReduceMin<RAJA::Policy::sequential, double> min; | |
RAJA::ReduceMax<RAJA::Policy::sequential, double> max; | |
RAJA::ReduceSum<RAJA::Policy::sequential, double> sum; | |
for (int i = 0; i < N; ++i) { | |
min.min(array[i]); | |
max.max(array[i]); | |
sum += array[i]; | |
} | |
printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(), | |
sum.get()); | |
} | |
int | |
main() { | |
RAJA::ReduceMinLoc<RAJA::Policy::sequential, double> loc; | |
std::array<double, 100> arr; | |
std::iota(arr.rbegin(), arr.rend(), 1); | |
for (int i = 0; i < 100; ++i) { | |
loc.min_loc(arr[i], i); | |
// loc.reduce(arr[i], i); | |
} | |
get_stats(arr.data(), 100); | |
get_stats2(arr.data(), 100); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Next steps:
switch ReduceBase to non-owning (only have it store a pointer) -- the ctor should accept a callable that returns the valid write space.
example (for sequential exec):
and for openmp exec: