Skip to content

Instantly share code, notes, and snippets.

@willkill07
Last active May 4, 2017 03:50
Show Gist options
  • Save willkill07/440fbd97f4fa12346598128463b4b8bd to your computer and use it in GitHub Desktop.
Save willkill07/440fbd97f4fa12346598128463b4b8bd to your computer and use it in GitHub Desktop.
RAJA Reduce Redux
// use C++11 to compile
#include <array>
#include <limits>
#include <numeric>
#include <type_traits>
#ifdef _OPENMP
#include <omp.h>
#endif
#define RAJA_HOST_DEVICE
template <typename...>
using void_t = void;
namespace RAJA {
enum class Policy { undefined, sequential, openmp, cuda, cilk };
// RAJA operators already exists -- reuse what we have; enhance where necessary
namespace operators {
using IndexType = int;
template <typename T, typename I = IndexType>
struct LocationPair {
using index_type = I;
using value_type = T;
T value;
I index;
};
template <typename T>
struct plus {
constexpr RAJA_HOST_DEVICE T
operator()(T const& t1, T const& t2) const {
return t1 + t2;
}
};
template <typename T>
struct multiplies {
constexpr RAJA_HOST_DEVICE T
operator()(T const& t1, T const& t2) const {
return t1 * t2;
}
};
template <typename T>
struct min {
constexpr RAJA_HOST_DEVICE T
operator()(T const& t1, T const& t2) const {
return (t1 < t2) ? t1 : t2;
}
};
template <typename T>
struct max {
constexpr RAJA_HOST_DEVICE T
operator()(T const& t1, T const& t2) const {
return (t1 > t2) ? t1 : t2;
}
};
template <typename Pair>
struct min_loc {
constexpr RAJA_HOST_DEVICE Pair
operator()(Pair const& p1, Pair const& p2) const {
return (p1.value < p2.value) ? p1 : p2;
}
};
template <typename Pair>
struct max_loc {
constexpr RAJA_HOST_DEVICE Pair
operator()(Pair const& p1, Pair const& p2) const {
return (p1.value > p2.value) ? p1 : p2;
}
};
// RAJA operators should have defaults!
// Each binary associative operator currently has ::identity
// Need to add for {min,max}_loc
// note: I am not sure which approach is better?
template <template <typename...> class Operator>
struct defaults {};
template <>
struct defaults<plus> {
template <typename T>
static constexpr RAJA_HOST_DEVICE T
value() {
return T(0);
}
};
template <>
struct defaults<multiplies> {
template <typename T>
static constexpr RAJA_HOST_DEVICE T
value() {
return T(1);
}
};
template <>
struct defaults<min> {
template <typename T>
static constexpr RAJA_HOST_DEVICE T
value() {
// use RAJA::operators::limits<T>::max();
return std::numeric_limits<T>::max();
}
};
template <>
struct defaults<max> {
template <typename T>
static constexpr RAJA_HOST_DEVICE T
value() {
// use RAJA::operators::limits<T>::min();
return std::numeric_limits<T>::min();
}
};
// be smart and reuse for {min,max}_loc!
template <>
struct defaults<min_loc> {
template <typename T, typename I = typename LocationPair<T>::index_type>
static constexpr RAJA_HOST_DEVICE LocationPair<T, I>
value() {
return LocationPair<T, I>{defaults<min>::value<T>(), I()};
}
};
template <>
struct defaults<max_loc> {
template <typename T, typename I = typename LocationPair<T>::index_type>
static constexpr RAJA_HOST_DEVICE LocationPair<T, I>
value() {
return LocationPair<T, I>{defaults<max>::value<T>(), I()};
}
};
}
// This is defined for all reducers
template <Policy Pol, template <typename> class Op, typename T>
struct ReduceBase {
using value_type = decltype(operators::defaults<Op>::template value<T>());
ReduceBase() = default;
template <typename... Ts>
constexpr ReduceBase(Ts&&... ts) : value{ts...} {}
constexpr ReduceBase(T const& v) : value{v} {}
ReduceBase(ReduceBase const&) = delete;
template <typename... Ts>
value_type
reduce(Ts&&... ts) const {
value = Op<value_type>{}(value, value_type{ts...});
return value;
}
constexpr operator value_type() const { return value; }
value_type
get() const {
return value;
}
void
reset(value_type v = operators::defaults<Op>::template value<T>()) const {
value = v;
}
// conditionally enable aggregate operators pending Policy type
template <typename U>
value_type
operator+=(U&& v) const {
static_assert(std::is_same<Op<U>, operators::plus<U>>::value,
"invalid reduction function for operator");
return reduce(v);
}
template <typename U>
value_type
operator*=(U&& v) const {
static_assert(std::is_same<Op<U>, operators::multiplies<U>>::value,
"invalid reduction function for operator");
return reduce(v);
}
template <typename U>
value_type
min(U&& v) const {
static_assert(std::is_same<Op<U>, operators::min<U>>::value,
"invalid reduction function for operator");
return reduce(v);
}
template <typename U>
value_type
max(U&& v) const {
static_assert(std::is_same<Op<U>, operators::max<U>>::value,
"invalid reduction function for operator");
return reduce(v);
}
template <typename V, typename I>
value_type
min_loc(V&& v, I&& i) {
using Pair = operators::LocationPair<V, I>;
static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value,
"invalid reduction function for operator");
return reduce(v, i);
}
template <typename V, typename I>
value_type
max_loc(V&& v, I&& i) {
using Pair = operators::LocationPair<V, I>;
static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value,
"invalid reduction function for operator");
return reduce(v, i);
}
protected:
mutable value_type value{operators::defaults<Op>::template value<T>()};
};
// default Reduction class -- note how it is empty :)
template <Policy Pol, template <typename> class Op, typename T>
struct Reduce {};
// magic grab-all for specializing on a Policy type
template <template <typename> class Op, typename T>
struct Reduce<Policy::sequential, Op, T>
: public ReduceBase<Policy::sequential, Op, T> {
using parent_t = ReduceBase<Policy::sequential, Op, T>;
using parent_t::get;
Reduce() = default;
Reduce(Reduce&&) = default;
Reduce(Reduce const& par) : parent{&par}, owner{false} {}
~Reduce() {
if (!owner) {
parent->reduce(get());
}
}
private:
Reduce const* parent;
bool const owner{true};
};
#ifdef _OPENMP
// I know this doesn't work right now, but I think you get the idea...
template <template <typename> class Op, typename T>
struct Reduce<Policy::openmp, Op, T>
: public ReduceBase<Policy::openmp, Op, T> {
using parent_t = ReduceBase<Policy::openmp, Op, T>;
using parent_t::get;
using value_type = typename parent_t::value_type;
Reduce() = default;
template <typename... Ts>
constexpr Reduce(Ts&&... ts) : parent_t{ts...}, lock{new omp_lock_t}, data_block{new value_type[omp_get_max_threads()]} {
omp_init_lock(lock);
}
Reduce(Reduce&&) = default;
Reduce(Reduce const& par) : lock{par.lock}, parent{&par}, data_block{par.data_block}, owner{false} {}
~Reduce() {
if (!owner) {
omp_set_lock(lock);
parent->reduce(get());
omp_unset_lock(lock);
} else {
for (int i = 0; i < omp_get_max_threads(); ++i) {
reduce(data_block[i]);
}
omp_destroy_lock(lock);
delete lock;
delete[] data_block;
}
}
private:
omp_lock_t* lock{nullptr};
Reduce const* parent{nullptr};
value_type* data_block{nullptr};
bool const owner{true};
};
#endif
// TODO for other policies -- modify constructors and storage accordingly
// Convenience type definitions
template <Policy Pol, typename T>
using ReduceMin = Reduce<Pol, operators::min, T>;
template <Policy Pol, typename T>
using ReduceMax = Reduce<Pol, operators::max, T>;
template <Policy Pol, typename T>
using ReduceSum = Reduce<Pol, operators::plus, T>;
template <Policy Pol, typename T>
using ReduceProduct = Reduce<Pol, operators::multiplies, T>;
template <Policy Pol, typename T>
using ReduceMinLoc = Reduce<Pol, operators::min_loc, T>;
template <Policy Pol, typename T>
using ReduceMaxLoc = Reduce<Pol, operators::max_loc, T>;
}
// Additional discussion -- reductions of arrays
//
// 1). define/use something similar to span<> from GSL
// 2). specialize min/max/plus for span<>
// 3). specialize operators::defaults for span<>
// 4). everything else should just work
// example code
template <typename Fn>
void
forall(int begin, int end, Fn&& f) {
for (int i = begin; i < end; ++i) {
f(i);
}
}
void
get_stats(double* array, int N) {
RAJA::ReduceMin<RAJA::Policy::sequential, double> min;
RAJA::ReduceMax<RAJA::Policy::sequential, double> max;
RAJA::ReduceSum<RAJA::Policy::sequential, double> sum;
forall(0, N, [=](int i) {
min.min(array[i]);
max.max(array[i]);
sum += array[i];
});
printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(),
sum.get());
}
void
get_stats2(double* array, int N) {
RAJA::ReduceMin<RAJA::Policy::sequential, double> min;
RAJA::ReduceMax<RAJA::Policy::sequential, double> max;
RAJA::ReduceSum<RAJA::Policy::sequential, double> sum;
for (int i = 0; i < N; ++i) {
min.min(array[i]);
max.max(array[i]);
sum += array[i];
}
printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(),
sum.get());
}
int
main() {
RAJA::ReduceMinLoc<RAJA::Policy::sequential, double> loc;
std::array<double, 100> arr;
std::iota(arr.rbegin(), arr.rend(), 1);
for (int i = 0; i < 100; ++i) {
loc.min_loc(arr[i], i);
// loc.reduce(arr[i], i);
}
get_stats(arr.data(), 100);
get_stats2(arr.data(), 100);
return 0;
}
@willkill07
Copy link
Author

Next steps:

switch ReduceBase to non-owning (only have it store a pointer) -- the ctor should accept a callable that returns the valid write space.

example (for sequential exec):

value_type data_block;

...

value_type& data() {
  return data_block;
}

...

template <typename... Ts>
constexpr Reduce(Ts&&... ts) : parent_t{data, ts...} {}

and for openmp exec:

value_type* data_block{new value_type[omp_get_max_threads()]};

...

value_type& data() {
  return data_block[omp_get_thread_num()];
}

...

template <typename... Ts>
constexpr Reduce(Ts&&... ts) : parent_t{data, ts...} {}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment