Last active
July 19, 2018 19:24
-
-
Save JPenuchot/66a94c76f85deb71e880058b8400ab24 to your computer and use it in GitHub Desktop.
Fully optimized mean function using xsimd
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <numeric> | |
#include <utility> | |
#include <iostream> | |
#include <xsimd/xsimd.hpp> | |
using namespace std; | |
namespace xs = xsimd; | |
template<typename F, size_t... I> | |
inline void unroll_impl(F&& f, integer_sequence<size_t, I...>) | |
{ | |
( f(integral_constant<size_t, I>{}) , ... ); | |
} | |
template<size_t N, typename F> | |
inline void unroll(F&& f) | |
{ | |
unroll_impl(forward<F>(f), make_integer_sequence<size_t, N>{}); | |
} | |
template<typename T> | |
using align_vec = | |
vector<T, xs::aligned_allocator<T, xs::simd_type<T>::size * sizeof(T)>>; | |
template<typename T, size_t U = 4, typename F, typename It, typename... V> | |
void functor(F&& f, It begin, It end, V... Begins) | |
{ | |
using namespace std; | |
constexpr size_t S = xs::simd_type<T>::size; | |
constexpr size_t US = U * S; | |
const auto size = end - begin; | |
const auto simd_size = size - (size % S ); | |
const auto unrolled_simd_size = size - (size % US); | |
auto simd_end = &begin[simd_size]; | |
auto unrolled_simd_end = &begin[unrolled_simd_size]; | |
auto simd_op = [&](auto i) | |
{ | |
constexpr auto I = decltype(i)::value; | |
xs::store_aligned(&begin[I], f(xs::load_aligned(&Begins[I])...)); | |
}; | |
// Unrolled SIMD core | |
for(; begin < unrolled_simd_end; begin += US, ((Begins+= US), ...)) | |
unroll<U>([&](auto I) | |
{ | |
simd_op(integral_constant<size_t, I * S>{}); | |
}); | |
// SIMD end | |
for(; begin < simd_end; begin += S, ((Begins+= S), ...)) | |
simd_op(integral_constant<size_t, 0>{}); | |
// Scalar end | |
for(; begin < end; begin++, ((Begins++), ...)) | |
*begin = f((*Begins)...); | |
} | |
int main(int argc, char const *argv[]) | |
{ | |
const unsigned sz = 1001; | |
align_vec<float> a(sz), b(sz), r(sz); | |
fill(a.begin(), a.end(), 500.f); | |
fill(b.begin(), b.end(), 1000.f); | |
functor<float>( [](auto&& a, auto&& b) { return a + b; } | |
, r.data(), &r.data()[r.size()] | |
, a.data() | |
, b.data() | |
); | |
for(auto& val : r) cout << val << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment