willkill07 · May 4, 2017 03:50 · willkill07 · May 4, 2017
diff --git a/ReduceRedux.cpp b/ReduceRedux.cpp
 // use C++11 to compile

 #include <array>
 #include <limits>
 #include <numeric>
 #include <type_traits>

 #ifdef _OPENMP
 #include <omp.h>
 #endif

 #define RAJA_HOST_DEVICE

 template <typename...>
 using void_t = void;

 namespace RAJA {

 enum class Policy { undefined, sequential, openmp, cuda, cilk };

 // RAJA operators already exists -- reuse what we have; enhance where necessary
 namespace operators {

 using IndexType = int;

 template <typename T, typename I = IndexType>
 struct LocationPair {
  using index_type = I;
  using value_type = T;
  T value;
  I index;
 };

 template <typename T>
 struct plus {
  constexpr RAJA_HOST_DEVICE T
  operator()(T const& t1, T const& t2) const {
    return t1 + t2;
  }
 };
 template <typename T>
 struct multiplies {
  constexpr RAJA_HOST_DEVICE T
  operator()(T const& t1, T const& t2) const {
    return t1 * t2;
  }
 };
 template <typename T>
 struct min {
  constexpr RAJA_HOST_DEVICE T
  operator()(T const& t1, T const& t2) const {
    return (t1 < t2) ? t1 : t2;
  }
 };
 template <typename T>
 struct max {
  constexpr RAJA_HOST_DEVICE T
  operator()(T const& t1, T const& t2) const {
    return (t1 > t2) ? t1 : t2;
  }
 };
 template <typename Pair>
 struct min_loc {
  constexpr RAJA_HOST_DEVICE Pair
  operator()(Pair const& p1, Pair const& p2) const {
    return (p1.value < p2.value) ? p1 : p2;
  }
 };
 template <typename Pair>
 struct max_loc {
  constexpr RAJA_HOST_DEVICE Pair
  operator()(Pair const& p1, Pair const& p2) const {
    return (p1.value > p2.value) ? p1 : p2;
  }
 };

 // RAJA operators should have defaults!
 //  Each binary associative operator currently has ::identity
 //  Need to add for {min,max}_loc
 //  note: I am not sure which approach is better?
 template <template <typename...> class Operator>
 struct defaults {};

 template <>
 struct defaults<plus> {
  template <typename T>
  static constexpr RAJA_HOST_DEVICE T
  value() {
    return T(0);
  }
 };

 template <>
 struct defaults<multiplies> {
  template <typename T>
  static constexpr RAJA_HOST_DEVICE T
  value() {
    return T(1);
  }
 };

 template <>
 struct defaults<min> {
  template <typename T>
  static constexpr RAJA_HOST_DEVICE T
  value() {
    // use RAJA::operators::limits<T>::max();
    return std::numeric_limits<T>::max();
  }
 };

 template <>
 struct defaults<max> {
  template <typename T>
  static constexpr RAJA_HOST_DEVICE T
  value() {
    // use RAJA::operators::limits<T>::min();
    return std::numeric_limits<T>::min();
  }
 };

 // be smart and reuse for {min,max}_loc!
 template <>
 struct defaults<min_loc> {
  template <typename T, typename I = typename LocationPair<T>::index_type>
  static constexpr RAJA_HOST_DEVICE LocationPair<T, I>
  value() {
    return LocationPair<T, I>{defaults<min>::value<T>(), I()};
  }
 };

 template <>
 struct defaults<max_loc> {
  template <typename T, typename I = typename LocationPair<T>::index_type>
  static constexpr RAJA_HOST_DEVICE LocationPair<T, I>
  value() {
    return LocationPair<T, I>{defaults<max>::value<T>(), I()};
  }
 };
 }

 // This is defined for all reducers
 template <Policy Pol, template <typename> class Op, typename T>
 struct ReduceBase {
  using value_type = decltype(operators::defaults<Op>::template value<T>());

  ReduceBase() = default;
  template <typename... Ts>
  constexpr ReduceBase(Ts&&... ts) : value{ts...} {}
  constexpr ReduceBase(T const& v) : value{v} {}
  ReduceBase(ReduceBase const&) = delete;

  template <typename... Ts>
  value_type
  reduce(Ts&&... ts) const {
    value = Op<value_type>{}(value, value_type{ts...});
    return value;
  }

  constexpr operator value_type() const { return value; }

  value_type
  get() const {
    return value;
  }

  void
  reset(value_type v = operators::defaults<Op>::template value<T>()) const {
    value = v;
  }

  // conditionally enable aggregate operators pending Policy type
  template <typename U>
  value_type
  operator+=(U&& v) const {
    static_assert(std::is_same<Op<U>, operators::plus<U>>::value,
                  "invalid reduction function for operator");
    return reduce(v);
  }
  template <typename U>
  value_type
  operator*=(U&& v) const {
    static_assert(std::is_same<Op<U>, operators::multiplies<U>>::value,
                  "invalid reduction function for operator");
    return reduce(v);
  }
  template <typename U>
  value_type
  min(U&& v) const {
    static_assert(std::is_same<Op<U>, operators::min<U>>::value,
                  "invalid reduction function for operator");
    return reduce(v);
  }
  template <typename U>
  value_type
  max(U&& v) const {
    static_assert(std::is_same<Op<U>, operators::max<U>>::value,
                  "invalid reduction function for operator");
    return reduce(v);
  }

  template <typename V, typename I>
  value_type
  min_loc(V&& v, I&& i) {
    using Pair = operators::LocationPair<V, I>;
    static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value,
                  "invalid reduction function for operator");
    return reduce(v, i);
  }

  template <typename V, typename I>
  value_type
  max_loc(V&& v, I&& i) {
    using Pair = operators::LocationPair<V, I>;
    static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value,
                  "invalid reduction function for operator");
    return reduce(v, i);
  }

 protected:
  mutable value_type value{operators::defaults<Op>::template value<T>()};
 };

 // default Reduction class -- note how it is empty :)
 template <Policy Pol, template <typename> class Op, typename T>
 struct Reduce {};

 // magic grab-all for specializing on a Policy type

 template <template <typename> class Op, typename T>
 struct Reduce<Policy::sequential, Op, T>
    : public ReduceBase<Policy::sequential, Op, T> {
  using parent_t = ReduceBase<Policy::sequential, Op, T>;
  using parent_t::get;

  Reduce()         = default;
  Reduce(Reduce&&) = default;
  Reduce(Reduce const& par) : parent{&par}, owner{false} {}
  ~Reduce() {
    if (!owner) {
      parent->reduce(get());
    }
  }

 private:
  Reduce const* parent;
  bool const owner{true};
 };


 #ifdef _OPENMP

 // I know this doesn't work right now, but I think you get the idea...

 template <template <typename> class Op, typename T>
 struct Reduce<Policy::openmp, Op, T>
    : public ReduceBase<Policy::openmp, Op, T> {
  using parent_t = ReduceBase<Policy::openmp, Op, T>;
  using parent_t::get;

  using value_type = typename parent_t::value_type;

  Reduce() = default;

  template <typename... Ts>
  constexpr Reduce(Ts&&... ts) : parent_t{ts...}, lock{new omp_lock_t}, data_block{new value_type[omp_get_max_threads()]} {
    omp_init_lock(lock);
  }

  Reduce(Reduce&&) = default;
  Reduce(Reduce const& par) : lock{par.lock}, parent{&par}, data_block{par.data_block}, owner{false} {}
  ~Reduce() {
    if (!owner) {
      omp_set_lock(lock);
      parent->reduce(get());
      omp_unset_lock(lock);
    } else {
      for (int i = 0; i < omp_get_max_threads(); ++i) {
        reduce(data_block[i]);
      }
      omp_destroy_lock(lock);
      delete lock;
      delete[] data_block;
    }
  }

 private:
  omp_lock_t* lock{nullptr};
  Reduce const* parent{nullptr};
  value_type* data_block{nullptr};
  bool const owner{true};
 };

 #endif

 // TODO for other policies -- modify constructors and storage accordingly

 // Convenience type definitions
 template <Policy Pol, typename T>
 using ReduceMin = Reduce<Pol, operators::min, T>;
 template <Policy Pol, typename T>
 using ReduceMax = Reduce<Pol, operators::max, T>;
 template <Policy Pol, typename T>
 using ReduceSum = Reduce<Pol, operators::plus, T>;
 template <Policy Pol, typename T>
 using ReduceProduct = Reduce<Pol, operators::multiplies, T>;
 template <Policy Pol, typename T>
 using ReduceMinLoc = Reduce<Pol, operators::min_loc, T>;
 template <Policy Pol, typename T>
 using ReduceMaxLoc = Reduce<Pol, operators::max_loc, T>;
 }

 // Additional discussion -- reductions of arrays
 //
 // 1). define/use something similar to span<> from GSL
 // 2). specialize min/max/plus for span<>
 // 3). specialize operators::defaults for span<>
 // 4). everything else should just work

 // example code

 template <typename Fn>
 void
 forall(int begin, int end, Fn&& f) {
  for (int i = begin; i < end; ++i) {
    f(i);
  }
 }

 void
 get_stats(double* array, int N) {
  RAJA::ReduceMin<RAJA::Policy::sequential, double> min;
  RAJA::ReduceMax<RAJA::Policy::sequential, double> max;
  RAJA::ReduceSum<RAJA::Policy::sequential, double> sum;

  forall(0, N, [=](int i) {
    min.min(array[i]);
    max.max(array[i]);
    sum += array[i];
  });
  printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(),
         sum.get());
 }

 void
 get_stats2(double* array, int N) {
  RAJA::ReduceMin<RAJA::Policy::sequential, double> min;
  RAJA::ReduceMax<RAJA::Policy::sequential, double> max;
  RAJA::ReduceSum<RAJA::Policy::sequential, double> sum;

  for (int i = 0; i < N; ++i) {
    min.min(array[i]);
    max.max(array[i]);
    sum += array[i];
  }
  printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(),
         sum.get());
 }

 int
 main() {
  RAJA::ReduceMinLoc<RAJA::Policy::sequential, double> loc;
  std::array<double, 100> arr;
  std::iota(arr.rbegin(), arr.rend(), 1);
  for (int i = 0; i < 100; ++i) {
    loc.min_loc(arr[i], i);
    // loc.reduce(arr[i], i);
  }
  get_stats(arr.data(), 100);
  get_stats2(arr.data(), 100);

  return 0;
 }
	// use C++11 to compile

	#include <array>
	#include <limits>
	#include <numeric>
	#include <type_traits>

	#ifdef _OPENMP
	#include <omp.h>
	#endif

	#define RAJA_HOST_DEVICE

	template <typename...>
	using void_t = void;

	namespace RAJA {

	enum class Policy { undefined, sequential, openmp, cuda, cilk };

	// RAJA operators already exists -- reuse what we have; enhance where necessary
	namespace operators {

	using IndexType = int;

	template <typename T, typename I = IndexType>
	struct LocationPair {
	using index_type = I;
	using value_type = T;
	T value;
	I index;
	};

	template <typename T>
	struct plus {
	constexpr RAJA_HOST_DEVICE T
	operator()(T const& t1, T const& t2) const {
	return t1 + t2;
	}
	};
	template <typename T>
	struct multiplies {
	constexpr RAJA_HOST_DEVICE T
	operator()(T const& t1, T const& t2) const {
	return t1 * t2;
	}
	};
	template <typename T>
	struct min {
	constexpr RAJA_HOST_DEVICE T
	operator()(T const& t1, T const& t2) const {
	return (t1 < t2) ? t1 : t2;
	}
	};
	template <typename T>
	struct max {
	constexpr RAJA_HOST_DEVICE T
	operator()(T const& t1, T const& t2) const {
	return (t1 > t2) ? t1 : t2;
	}
	};
	template <typename Pair>
	struct min_loc {
	constexpr RAJA_HOST_DEVICE Pair
	operator()(Pair const& p1, Pair const& p2) const {
	return (p1.value < p2.value) ? p1 : p2;
	}
	};
	template <typename Pair>
	struct max_loc {
	constexpr RAJA_HOST_DEVICE Pair
	operator()(Pair const& p1, Pair const& p2) const {
	return (p1.value > p2.value) ? p1 : p2;
	}
	};

	// RAJA operators should have defaults!
	// Each binary associative operator currently has ::identity
	// Need to add for {min,max}_loc
	// note: I am not sure which approach is better?
	template <template <typename...> class Operator>
	struct defaults {};

	template <>
	struct defaults<plus> {
	template <typename T>
	static constexpr RAJA_HOST_DEVICE T
	value() {
	return T(0);
	}
	};

	template <>
	struct defaults<multiplies> {
	template <typename T>
	static constexpr RAJA_HOST_DEVICE T
	value() {
	return T(1);
	}
	};

	template <>
	struct defaults<min> {
	template <typename T>
	static constexpr RAJA_HOST_DEVICE T
	value() {
	// use RAJA::operators::limits<T>::max();
	return std::numeric_limits<T>::max();
	}
	};

	template <>
	struct defaults<max> {
	template <typename T>
	static constexpr RAJA_HOST_DEVICE T
	value() {
	// use RAJA::operators::limits<T>::min();
	return std::numeric_limits<T>::min();
	}
	};

	// be smart and reuse for {min,max}_loc!
	template <>
	struct defaults<min_loc> {
	template <typename T, typename I = typename LocationPair<T>::index_type>
	static constexpr RAJA_HOST_DEVICE LocationPair<T, I>
	value() {
	return LocationPair<T, I>{defaults<min>::value<T>(), I()};
	}
	};

	template <>
	struct defaults<max_loc> {
	template <typename T, typename I = typename LocationPair<T>::index_type>
	static constexpr RAJA_HOST_DEVICE LocationPair<T, I>
	value() {
	return LocationPair<T, I>{defaults<max>::value<T>(), I()};
	}
	};
	}

	// This is defined for all reducers
	template <Policy Pol, template <typename> class Op, typename T>
	struct ReduceBase {
	using value_type = decltype(operators::defaults<Op>::template value<T>());

	ReduceBase() = default;
	template <typename... Ts>
	constexpr ReduceBase(Ts&&... ts) : value{ts...} {}
	constexpr ReduceBase(T const& v) : value{v} {}
	ReduceBase(ReduceBase const&) = delete;

	template <typename... Ts>
	value_type
	reduce(Ts&&... ts) const {
	value = Op<value_type>{}(value, value_type{ts...});
	return value;
	}

	constexpr operator value_type() const { return value; }

	value_type
	get() const {
	return value;
	}

	void
	reset(value_type v = operators::defaults<Op>::template value<T>()) const {
	value = v;
	}

	// conditionally enable aggregate operators pending Policy type
	template <typename U>
	value_type
	operator+=(U&& v) const {
	static_assert(std::is_same<Op<U>, operators::plus<U>>::value,
	"invalid reduction function for operator");
	return reduce(v);
	}
	template <typename U>
	value_type
	operator*=(U&& v) const {
	static_assert(std::is_same<Op<U>, operators::multiplies<U>>::value,
	"invalid reduction function for operator");
	return reduce(v);
	}
	template <typename U>
	value_type
	min(U&& v) const {
	static_assert(std::is_same<Op<U>, operators::min<U>>::value,
	"invalid reduction function for operator");
	return reduce(v);
	}
	template <typename U>
	value_type
	max(U&& v) const {
	static_assert(std::is_same<Op<U>, operators::max<U>>::value,
	"invalid reduction function for operator");
	return reduce(v);
	}

	template <typename V, typename I>
	value_type
	min_loc(V&& v, I&& i) {
	using Pair = operators::LocationPair<V, I>;
	static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value,
	"invalid reduction function for operator");
	return reduce(v, i);
	}

	template <typename V, typename I>
	value_type
	max_loc(V&& v, I&& i) {
	using Pair = operators::LocationPair<V, I>;
	static_assert(std::is_same<Op<Pair>, operators::min_loc<Pair>>::value,
	"invalid reduction function for operator");
	return reduce(v, i);
	}

	protected:
	mutable value_type value{operators::defaults<Op>::template value<T>()};
	};

	// default Reduction class -- note how it is empty :)
	template <Policy Pol, template <typename> class Op, typename T>
	struct Reduce {};

	// magic grab-all for specializing on a Policy type

	template <template <typename> class Op, typename T>
	struct Reduce<Policy::sequential, Op, T>
	: public ReduceBase<Policy::sequential, Op, T> {
	using parent_t = ReduceBase<Policy::sequential, Op, T>;
	using parent_t::get;

	Reduce() = default;
	Reduce(Reduce&&) = default;
	Reduce(Reduce const& par) : parent{&par}, owner{false} {}
	~Reduce() {
	if (!owner) {
	parent->reduce(get());
	}
	}

	private:
	Reduce const* parent;
	bool const owner{true};
	};


	#ifdef _OPENMP

	// I know this doesn't work right now, but I think you get the idea...

	template <template <typename> class Op, typename T>
	struct Reduce<Policy::openmp, Op, T>
	: public ReduceBase<Policy::openmp, Op, T> {
	using parent_t = ReduceBase<Policy::openmp, Op, T>;
	using parent_t::get;

	using value_type = typename parent_t::value_type;

	Reduce() = default;

	template <typename... Ts>
	constexpr Reduce(Ts&&... ts) : parent_t{ts...}, lock{new omp_lock_t}, data_block{new value_type[omp_get_max_threads()]} {
	omp_init_lock(lock);
	}

	Reduce(Reduce&&) = default;
	Reduce(Reduce const& par) : lock{par.lock}, parent{&par}, data_block{par.data_block}, owner{false} {}
	~Reduce() {
	if (!owner) {
	omp_set_lock(lock);
	parent->reduce(get());
	omp_unset_lock(lock);
	} else {
	for (int i = 0; i < omp_get_max_threads(); ++i) {
	reduce(data_block[i]);
	}
	omp_destroy_lock(lock);
	delete lock;
	delete[] data_block;
	}
	}

	private:
	omp_lock_t* lock{nullptr};
	Reduce const* parent{nullptr};
	value_type* data_block{nullptr};
	bool const owner{true};
	};

	#endif

	// TODO for other policies -- modify constructors and storage accordingly

	// Convenience type definitions
	template <Policy Pol, typename T>
	using ReduceMin = Reduce<Pol, operators::min, T>;
	template <Policy Pol, typename T>
	using ReduceMax = Reduce<Pol, operators::max, T>;
	template <Policy Pol, typename T>
	using ReduceSum = Reduce<Pol, operators::plus, T>;
	template <Policy Pol, typename T>
	using ReduceProduct = Reduce<Pol, operators::multiplies, T>;
	template <Policy Pol, typename T>
	using ReduceMinLoc = Reduce<Pol, operators::min_loc, T>;
	template <Policy Pol, typename T>
	using ReduceMaxLoc = Reduce<Pol, operators::max_loc, T>;
	}

	// Additional discussion -- reductions of arrays
	//
	// 1). define/use something similar to span<> from GSL
	// 2). specialize min/max/plus for span<>
	// 3). specialize operators::defaults for span<>
	// 4). everything else should just work

	// example code

	template <typename Fn>
	void
	forall(int begin, int end, Fn&& f) {
	for (int i = begin; i < end; ++i) {
	f(i);
	}
	}

	void
	get_stats(double* array, int N) {
	RAJA::ReduceMin<RAJA::Policy::sequential, double> min;
	RAJA::ReduceMax<RAJA::Policy::sequential, double> max;
	RAJA::ReduceSum<RAJA::Policy::sequential, double> sum;

	forall(0, N, [=](int i) {
	min.min(array[i]);
	max.max(array[i]);
	sum += array[i];
	});
	printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(),
	sum.get());
	}

	void
	get_stats2(double* array, int N) {
	RAJA::ReduceMin<RAJA::Policy::sequential, double> min;
	RAJA::ReduceMax<RAJA::Policy::sequential, double> max;
	RAJA::ReduceSum<RAJA::Policy::sequential, double> sum;

	for (int i = 0; i < N; ++i) {
	min.min(array[i]);
	max.max(array[i]);
	sum += array[i];
	}
	printf("min: %0.9lf\nmax: %0.9lf\nsum: %0.9lf\n", min.get(), max.get(),
	sum.get());
	}

	int
	main() {
	RAJA::ReduceMinLoc<RAJA::Policy::sequential, double> loc;
	std::array<double, 100> arr;
	std::iota(arr.rbegin(), arr.rend(), 1);
	for (int i = 0; i < 100; ++i) {
	loc.min_loc(arr[i], i);
	// loc.reduce(arr[i], i);
	}
	get_stats(arr.data(), 100);
	get_stats2(arr.data(), 100);

	return 0;
	}