Last active
November 2, 2021 15:05
-
-
Save bernhardmgruber/bdbeae6f6d8eb710637cff61bb369947 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
// ============================================================================ | |
// == ./Meta.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
#include <boost/mp11.hpp> | |
#if BOOST_MP11_VERSION < 107300 | |
// Copyright 2015 Peter Dimov. | |
// | |
// Distributed under the Boost Software License, Version 1.0. | |
// | |
// Boost Software License - Version 1.0 - August 17th, 2003 | |
// | |
// Permission is hereby granted, free of charge, to any person or organization | |
// obtaining a copy of the software and accompanying documentation covered by | |
// this license (the "Software") to use, reproduce, display, distribute, | |
// execute, and transmit the Software, and to prepare derivative works of the | |
// Software, and to permit third-parties to whom the Software is furnished to | |
// do so, all subject to the following: | |
// | |
// The copyright notices in the Software and this entire statement, including | |
// the above license grant, this restriction and the following disclaimer, | |
// must be included in all copies of the Software, in whole or in part, and | |
// all derivative works of the Software, unless such copies or derivative | |
// works are solely in the form of machine-executable object code generated by | |
// a source language processor. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | |
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | |
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | |
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
// DEALINGS IN THE SOFTWARE. | |
namespace boost::mp11 | |
{ | |
namespace detail | |
{ | |
template<class L2> | |
struct mp_flatten_impl | |
{ | |
template<class T> | |
using fn = mp_if<mp_similar<L2, T>, T, mp_list<T>>; | |
}; | |
} // namespace detail | |
template<class L, class L2 = mp_clear<L>> | |
using mp_flatten = mp_apply<mp_append, mp_push_front<mp_transform_q<detail::mp_flatten_impl<L2>, L>, mp_clear<L>>>; | |
} // namespace boost::mp11 | |
#endif | |
namespace llama | |
{ | |
namespace internal | |
{ | |
template<typename FromList, template<auto...> class ToList> | |
struct mp_unwrap_values_into_impl; | |
template<template<class...> class FromList, typename... Values, template<auto...> class ToList> | |
struct mp_unwrap_values_into_impl<FromList<Values...>, ToList> | |
{ | |
using type = ToList<Values::value...>; | |
}; | |
template<typename FromList, template<auto...> class ToList> | |
using mp_unwrap_values_into = typename mp_unwrap_values_into_impl<FromList, ToList>::type; | |
} // namespace internal | |
} // namespace llama | |
// == | |
// == ./Meta.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./macros.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
#ifdef __INTEL_COMPILER | |
# error LLAMA has stopped supporting the Intel Classic Compiler after Intel announced its planned deprecation and \ | |
replacement by the Intel LLVM-based compiler. Please migrate to the Intel LLVM-based compiler. | |
#endif | |
#if defined(__INTEL_LLVM_COMPILER) | |
# define LLAMA_INDEPENDENT_DATA _Pragma("ivdep") | |
#elif defined(__clang__) | |
# define LLAMA_INDEPENDENT_DATA _Pragma("clang loop vectorize(assume_safety) interleave(assume_safety)") | |
#elif defined(__GNUC__) | |
# define LLAMA_INDEPENDENT_DATA _Pragma("GCC ivdep") | |
#elif defined(_MSC_VER) | |
# define LLAMA_INDEPENDENT_DATA __pragma(loop(ivdep)) | |
#else | |
/// May be put in front of a loop statement. Indicates that all (!) data access inside the loop is indepent, so the | |
/// loop can be safely vectorized. Example: \code{.cpp} | |
/// LLAMA_INDEPENDENT_DATA | |
/// for(int i = 0; i < N; ++i) | |
/// // because of LLAMA_INDEPENDENT_DATA the compiler knows that a and b | |
/// // do not overlap and the operation can safely be vectorized | |
/// a[i] += b[i]; | |
/// \endcode | |
# define LLAMA_INDEPENDENT_DATA | |
#endif | |
#ifndef LLAMA_FORCE_INLINE | |
# if defined(__NVCC__) | |
# define LLAMA_FORCE_INLINE __forceinline__ | |
# elif defined(__GNUC__) || defined(__clang__) | |
# define LLAMA_FORCE_INLINE inline __attribute__((always_inline)) | |
# elif defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) | |
# define LLAMA_FORCE_INLINE __forceinline | |
# else | |
/// Forces the compiler to inline a function annotated with this macro | |
# define LLAMA_FORCE_INLINE inline | |
# warning LLAMA_FORCE_INLINE is only defined to "inline" for this compiler | |
# endif | |
#endif | |
#ifndef LLAMA_PRAGMA | |
# define LLAMA_PRAGMA(tokens) _Pragma(# tokens) | |
#endif | |
#ifndef LLAMA_UNROLL | |
# if defined(__NVCC__) || defined(__clang__) || defined(__INTEL_LLVM_COMPILER) | |
# define LLAMA_UNROLL(...) LLAMA_PRAGMA(unroll __VA_ARGS__) | |
# elif defined(__GNUG__) | |
# define LLAMA_UNROLL(...) LLAMA_PRAGMA(GCC unroll __VA_ARGS__) | |
# elif defined(_MSC_VER) | |
// MSVC does not support a pragma for unrolling | |
# define LLAMA_UNROLL(...) | |
# else | |
/// Requests the compiler to unroll the loop following this directive. An optional unrolling count may be provided as | |
/// argument, which must be a constant expression. | |
# define LLAMA_UNROLL(...) | |
# warning LLAMA_UNROLL is not implemented for your compiler | |
# endif | |
#endif | |
#ifndef LLAMA_HOST_ACC | |
# if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__)) | |
# define LLAMA_HOST_ACC __host__ __device__ | |
# elif defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) | |
# define LLAMA_HOST_ACC | |
# else | |
/// Some offloading parallelization language extensions such a CUDA, OpenACC or OpenMP 4.5 need to specify whether a | |
/// class, struct, function or method "resides" on the host, the accelerator (the offloading device) or both. LLAMA | |
/// supports this with marking every function needed on an accelerator with `LLAMA_HOST_ACC`. | |
# define LLAMA_HOST_ACC | |
# warning LLAMA_HOST_ACC is only defined empty for this compiler | |
# endif | |
#endif | |
#define LLAMA_FN_HOST_ACC_INLINE LLAMA_FORCE_INLINE LLAMA_HOST_ACC | |
#ifndef LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS | |
# if defined(__clang__) || defined(__INTEL_LLVM_COMPILER) | |
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) __attribute__((always_inline)) __VA_ARGS__ | |
# elif defined(__GNUC__) || (defined(__NVCC__) && !defined(_MSC_VER)) | |
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) __VA_ARGS__ __attribute__((always_inline)) | |
# elif defined(_MSC_VER) | |
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) \ | |
__VA_ARGS__ /* FIXME: MSVC cannot combine constexpr and [[msvc::forceinline]] */ | |
# else | |
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) __VA_ARGS__ | |
# warning LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS not defined for this compiler | |
# endif | |
#endif | |
#ifndef LLAMA_LAMBDA_INLINE | |
/// Gives strong indication to the compiler to inline the attributed lambda. | |
# define LLAMA_LAMBDA_INLINE LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS() | |
#endif | |
/// Suppresses nvcc warning: 'calling a __host__ function from __host__ __device__ function.' | |
#if defined(__NVCC__) && !defined(__clang__) | |
# define LLAMA_SUPPRESS_HOST_DEVICE_WARNING _Pragma("nv_exec_check_disable") | |
#else | |
# define LLAMA_SUPPRESS_HOST_DEVICE_WARNING | |
#endif | |
#if defined(_MSC_VER) | |
# define LLAMA_FORCE_INLINE_RECURSIVE __pragma(inline_depth(255)) | |
#else | |
/// Forces the compiler to recursively inline the call hiearchy started by the subsequent function call. | |
# define LLAMA_FORCE_INLINE_RECURSIVE | |
#endif | |
/// Forces a copy of a value. This is useful to prevent ODR usage of constants when compiling for GPU targets. | |
#define LLAMA_COPY(x) decltype(x)(x) | |
// TODO(bgruber): clang 10 and 11 fail to compile this currently with the issue described here: | |
// https://stackoverflow.com/questions/64300832/why-does-clang-think-gccs-subrange-does-not-satisfy-gccs-ranges-begin-functi | |
// let's try again with clang 12 | |
// Intel LLVM compiler is also using the clang frontend | |
#if(__has_include(<ranges>) && defined(__cpp_concepts) && !defined(__clang__) && !defined(__INTEL_LLVM_COMPILER)) | |
# define CAN_USE_RANGES 1 | |
#else | |
# define CAN_USE_RANGES 0 | |
#endif | |
// == | |
// == ./macros.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./Proofs.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// ============================================================================ | |
// == ./ArrayIndexRange.hpp == | |
// == | |
// #pragma once | |
// ============================================================================ | |
// == ./ArrayExtents.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// ============================================================================ | |
// == ./Array.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "macros.hpp" // amalgamate: file already expanded | |
#include <ostream> | |
#include <tuple> | |
namespace llama | |
{ | |
/// Array class like `std::array` but suitable for use with offloading devices like GPUs. | |
/// \tparam T type if array elements. | |
/// \tparam N rank of the array. | |
template<typename T, std::size_t N> | |
struct Array | |
{ | |
using value_type = T; | |
T element[N > 0 ? N : 1]; | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto size() const | |
{ | |
return N; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() -> T* | |
{ | |
return &element[0]; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() const -> const T* | |
{ | |
return &element[0]; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() -> T* | |
{ | |
return &element[N]; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() const -> const T* | |
{ | |
return &element[N]; | |
} | |
template<typename IndexType> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator[](IndexType&& idx) -> T& | |
{ | |
return element[idx]; | |
} | |
template<typename IndexType> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator[](IndexType&& idx) const -> T const& | |
{ | |
return element[idx]; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator==(const Array& a, const Array& b) -> bool | |
{ | |
for(std::size_t i = 0; i < N; ++i) | |
if(a.element[i] != b.element[i]) | |
return false; | |
return true; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator!=(const Array& a, const Array& b) -> bool | |
{ | |
return !(a == b); | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator+(const Array& a, const Array& b) -> Array | |
{ | |
Array temp{}; | |
for(std::size_t i = 0; i < N; ++i) | |
temp[i] = a[i] + b[i]; | |
return temp; | |
} | |
template<std::size_t I> | |
constexpr auto get() -> T& | |
{ | |
return element[I]; | |
} | |
template<std::size_t I> | |
constexpr auto get() const -> const T& | |
{ | |
return element[I]; | |
} | |
}; | |
template<typename T> | |
struct Array<T, 0> | |
{ | |
using value_type = T; | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto size() const | |
{ | |
return 0; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() -> T* | |
{ | |
return nullptr; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() const -> const T* | |
{ | |
return nullptr; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() -> T* | |
{ | |
return nullptr; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() const -> const T* | |
{ | |
return nullptr; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator==(const Array&, const Array&) -> bool | |
{ | |
return true; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator!=(const Array&, const Array&) -> bool | |
{ | |
return false; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator+(const Array&, const Array&) -> Array | |
{ | |
return {}; | |
} | |
}; | |
template<typename First, typename... Args> | |
Array(First, Args... args) -> Array<First, sizeof...(Args) + 1>; | |
template<typename T, std::size_t N> | |
auto operator<<(std::ostream& os, const Array<T, N>& a) -> std::ostream& | |
{ | |
os << "Array{"; | |
bool first = true; | |
for(auto e : a) | |
{ | |
if(first) | |
first = false; | |
else | |
os << ", "; | |
os << e; | |
} | |
os << "}"; | |
return os; | |
} | |
template<typename T, std::size_t N> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto push_front([[maybe_unused]] Array<T, N> a, T v) -> Array<T, N + 1> | |
{ | |
Array<T, N + 1> r{}; | |
r[0] = v; | |
if constexpr(N > 0) | |
for(std::size_t i = 0; i < N; i++) | |
r[i + 1] = a[i]; | |
return r; | |
} | |
template<typename T, std::size_t N> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto push_back([[maybe_unused]] Array<T, N> a, T v) -> Array<T, N + 1> | |
{ | |
Array<T, N + 1> r{}; | |
if constexpr(N > 0) | |
for(std::size_t i = 0; i < N; i++) | |
r[i] = a[i]; | |
r[N] = v; | |
return r; | |
} | |
template<typename T, std::size_t N> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_back([[maybe_unused]] Array<T, N> a) | |
{ | |
static_assert(N > 0); | |
Array<T, N - 1> r{}; | |
if constexpr(N > 1) | |
for(std::size_t i = 0; i < N - 1; i++) | |
r[i] = a[i]; | |
return r; | |
} | |
template<typename T, std::size_t N> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_front([[maybe_unused]] Array<T, N> a) | |
{ | |
static_assert(N > 0); | |
Array<T, N - 1> r{}; | |
if constexpr(N > 1) | |
for(std::size_t i = 0; i < N - 1; i++) | |
r[i] = a[i + 1]; | |
return r; | |
} | |
template<typename T, std::size_t N> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto product(Array<T, N> a) -> T | |
{ | |
T prod = 1; | |
for(auto s : a) | |
prod *= s; | |
return prod; | |
} | |
} // namespace llama | |
namespace std | |
{ | |
template<typename T, size_t N> | |
struct tuple_size<llama::Array<T, N>> : integral_constant<size_t, N> | |
{ | |
}; | |
template<size_t I, typename T, size_t N> | |
struct tuple_element<I, llama::Array<T, N>> | |
{ | |
using type = T; | |
}; | |
} // namespace std | |
// == | |
// == ./Array.hpp == | |
// ============================================================================ | |
// #include "Meta.hpp" // amalgamate: file already expanded | |
#include <limits> | |
#include <type_traits> | |
namespace llama | |
{ | |
// TODO(bgruber): make this an alias in C++20, when we have CTAD for aliases | |
/// Represents a run-time index into the array dimensions. | |
/// \tparam Dim Compile-time number of dimensions. | |
template<std::size_t Dim> | |
struct ArrayIndex : Array<std::size_t, Dim> | |
{ | |
static constexpr std::size_t rank = Dim; | |
}; | |
static_assert( | |
std::is_trivially_default_constructible_v<ArrayIndex<1>>); // so ArrayIndex<1>{} will produce a zeroed | |
// index. Should hold for all dimensions, | |
// but just checking for <1> here. | |
static_assert(std::is_trivially_copy_constructible_v<ArrayIndex<1>>); | |
static_assert(std::is_trivially_move_constructible_v<ArrayIndex<1>>); | |
static_assert(std::is_trivially_copy_assignable_v<ArrayIndex<1>>); | |
static_assert(std::is_trivially_move_assignable_v<ArrayIndex<1>>); | |
template<typename... Args> | |
ArrayIndex(Args...) -> ArrayIndex<sizeof...(Args)>; | |
} // namespace llama | |
template<size_t N> | |
struct std::tuple_size<llama::ArrayIndex<N>> : std::integral_constant<size_t, N> | |
{ | |
}; | |
template<size_t I, size_t N> | |
struct std::tuple_element<I, llama::ArrayIndex<N>> | |
{ | |
using type = size_t; | |
}; | |
namespace llama | |
{ | |
/// Used as a template argument to \ref ArrayExtents to mark a dynamic extent. | |
inline constexpr std::size_t dyn = std::numeric_limits<std::size_t>::max(); | |
/// ArrayExtents holding compile and runtime indices. This is conceptually equivalent to the std::extent of | |
/// std::mdspan. See: https://wg21.link/P0009 | |
template<std::size_t... Sizes> | |
struct ArrayExtents : Array<typename ArrayIndex<sizeof...(Sizes)>::value_type, ((Sizes == dyn) + ... + 0)> | |
{ | |
static constexpr std::size_t rank = sizeof...(Sizes); | |
static constexpr auto rank_dynamic = ((Sizes == dyn) + ... + 0); | |
static constexpr auto rank_static = rank - rank_dynamic; | |
using Index = ArrayIndex<rank>; | |
using value_type = typename Index::value_type; | |
template<std::size_t I> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto get() const | |
{ | |
using namespace boost::mp11; | |
using TypeList = mp_list_c<std::size_t, Sizes...>; | |
constexpr auto extent = mp_at_c<TypeList, I>::value; | |
if constexpr(extent != dyn) | |
return extent; | |
else | |
return static_cast<const Array<value_type, rank_dynamic>&>( | |
*this)[+mp_count<mp_take_c<TypeList, I>, mp_size_t<dyn>>::value]; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator[](std::size_t i) const | |
{ | |
return boost::mp11::mp_with_index<rank>(i, [&](auto ic) { return get<decltype(ic)::value>(); }); | |
} | |
private: | |
template<std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto toArray(std::index_sequence<Is...>) const -> Index | |
{ | |
return {get<Is>()...}; | |
} | |
public: | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto toArray() const -> Index | |
{ | |
return toArray(std::make_index_sequence<rank>{}); | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr operator Index() const | |
{ | |
return toArray(); | |
} | |
}; | |
template<> | |
struct ArrayExtents<> | |
{ | |
static constexpr std::size_t rank = 0; | |
static constexpr auto rank_dynamic = 0; | |
static constexpr auto rank_static = 0; | |
using Index = ArrayIndex<rank>; | |
using value_type = typename Index::value_type; | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto toArray() const -> Index | |
{ | |
return {}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr operator Index() const | |
{ | |
return toArray(); | |
} | |
}; | |
template<typename... Args> | |
ArrayExtents(Args... args) -> ArrayExtents<(Args{}, dyn)...>; | |
static_assert(std::is_trivially_default_constructible_v<ArrayExtents<1>>); | |
static_assert(std::is_trivially_copy_constructible_v<ArrayExtents<1>>); | |
static_assert(std::is_trivially_move_constructible_v<ArrayExtents<1>>); | |
static_assert(std::is_trivially_copy_assignable_v<ArrayExtents<1>>); | |
static_assert(std::is_trivially_move_assignable_v<ArrayExtents<1>>); | |
template<std::size_t... SizesA, std::size_t... SizesB> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(ArrayExtents<SizesA...> a, ArrayExtents<SizesB...> b) -> bool | |
{ | |
return a.toArray() == b.toArray(); | |
} | |
template<std::size_t... SizesA, std::size_t... SizesB> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(ArrayExtents<SizesA...> a, ArrayExtents<SizesB...> b) -> bool | |
{ | |
return !(a == b); | |
} | |
template<std::size_t... Sizes> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto product(ArrayExtents<Sizes...> e) -> | |
typename ArrayExtents<Sizes...>::value_type | |
{ | |
return product(e.toArray()); | |
} | |
/// N-dimensional ArrayExtents where all values are dynamic. | |
template<std::size_t N> | |
using ArrayExtentsDynamic = internal:: | |
mp_unwrap_values_into<boost::mp11::mp_repeat_c<boost::mp11::mp_list_c<std::size_t, dyn>, N>, ArrayExtents>; | |
/// N-dimensional ArrayExtents where all values are Extent. | |
template<std::size_t N, std::size_t Extent> | |
using ArrayExtentsStatic = internal:: | |
mp_unwrap_values_into<boost::mp11::mp_repeat_c<boost::mp11::mp_list_c<std::size_t, Extent>, N>, ArrayExtents>; | |
template<std::size_t Dim, typename Func, typename... OuterIndices> | |
LLAMA_FN_HOST_ACC_INLINE void forEachADCoord( | |
[[maybe_unused]] ArrayIndex<Dim> adSize, | |
Func&& func, | |
OuterIndices... outerIndices) | |
{ | |
if constexpr(Dim > 0) | |
for(std::size_t i = 0; i < adSize[0]; i++) | |
forEachADCoord(ArrayIndex<Dim - 1>{pop_front(adSize)}, std::forward<Func>(func), outerIndices..., i); | |
else | |
std::forward<Func>(func)(ArrayIndex<sizeof...(outerIndices)>{outerIndices...}); | |
} | |
template<std::size_t... Sizes, typename Func> | |
LLAMA_FN_HOST_ACC_INLINE void forEachADCoord(ArrayExtents<Sizes...> extents, Func&& func) | |
{ | |
forEachADCoord(extents.toArray(), std::forward<Func>(func)); | |
} | |
} // namespace llama | |
template<std::size_t... Sizes> | |
struct std::tuple_size<llama::ArrayExtents<Sizes...>> : std::integral_constant<std::size_t, sizeof...(Sizes)> | |
{ | |
}; | |
template<std::size_t I, std::size_t... Sizes> | |
struct std::tuple_element<I, llama::ArrayExtents<Sizes...>> | |
{ | |
using type = typename llama::ArrayExtents<Sizes...>::value_type; | |
}; | |
// == | |
// == ./ArrayExtents.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./Core.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "ArrayExtents.hpp" // amalgamate: file already expanded | |
// #include "Meta.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./RecordCoord.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Meta.hpp" // amalgamate: file already expanded | |
#include <array> | |
// #include <ostream> // amalgamate: file already included | |
// #include <type_traits> // amalgamate: file already included | |
namespace llama | |
{ | |
/// Represents a coordinate for a record inside the record dimension tree. | |
/// \tparam Coords... the compile time coordinate. | |
template<std::size_t... Coords> | |
struct RecordCoord | |
{ | |
/// The list of integral coordinates as `boost::mp11::mp_list`. | |
using List = boost::mp11::mp_list_c<std::size_t, Coords...>; | |
static constexpr std::size_t front = boost::mp11::mp_front<List>::value; | |
static constexpr std::size_t back = boost::mp11::mp_back<List>::value; | |
static constexpr std::size_t size = sizeof...(Coords); | |
}; | |
template<> | |
struct RecordCoord<> | |
{ | |
using List = boost::mp11::mp_list_c<std::size_t>; | |
static constexpr std::size_t size = 0; | |
}; | |
template<std::size_t... CoordsA, std::size_t... CoordsB> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(RecordCoord<CoordsA...>, RecordCoord<CoordsB...>) | |
{ | |
return false; | |
} | |
template<std::size_t... Coords> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(RecordCoord<Coords...>, RecordCoord<Coords...>) | |
{ | |
return true; | |
} | |
template<std::size_t... CoordsA, std::size_t... CoordsB> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(RecordCoord<CoordsA...> a, RecordCoord<CoordsB...> b) | |
{ | |
return !(a == b); | |
} | |
template<typename T> | |
inline constexpr bool isRecordCoord = false; | |
template<std::size_t... Coords> | |
inline constexpr bool isRecordCoord<RecordCoord<Coords...>> = true; | |
template<std::size_t... RCs> | |
auto operator<<(std::ostream& os, RecordCoord<RCs...>) -> std::ostream& | |
{ | |
os << "RecordCoord<"; | |
bool first = true; | |
for(auto rc : std::array<std::size_t, sizeof...(RCs)>{RCs...}) | |
{ | |
if(first) | |
first = false; | |
else | |
os << ", "; | |
os << rc; | |
} | |
os << ">"; | |
return os; | |
} | |
inline namespace literals | |
{ | |
/// Literal operator for converting a numeric literal into a \ref RecordCoord. | |
template<char... Digits> | |
constexpr auto operator"" _RC() | |
{ | |
constexpr auto coord = []() constexpr | |
{ | |
char digits[] = {(Digits - 48)...}; | |
std::size_t acc = 0; | |
std ::size_t powerOf10 = 1; | |
for(int i = sizeof...(Digits) - 1; i >= 0; i--) | |
{ | |
acc += digits[i] * powerOf10; | |
powerOf10 *= 10; | |
} | |
return acc; | |
} | |
(); | |
return RecordCoord<coord>{}; | |
} | |
} // namespace literals | |
/// Converts a type list of integral constants into a \ref RecordCoord. | |
template<typename L> | |
using RecordCoordFromList = internal::mp_unwrap_values_into<L, RecordCoord>; | |
/// Concatenate a set of \ref RecordCoord%s. | |
template<typename... RecordCoords> | |
using Cat = RecordCoordFromList<boost::mp11::mp_append<typename RecordCoords::List...>>; | |
/// Concatenate a set of \ref RecordCoord%s instances. | |
template<typename... RecordCoords> | |
constexpr auto cat(RecordCoords...) | |
{ | |
return Cat<RecordCoords...>{}; | |
} | |
/// RecordCoord without first coordinate component. | |
template<typename RecordCoord> | |
using PopFront = RecordCoordFromList<boost::mp11::mp_pop_front<typename RecordCoord::List>>; | |
namespace internal | |
{ | |
template<typename First, typename Second> | |
struct RecordCoordCommonPrefixIsBiggerImpl; | |
template<std::size_t... Coords1, std::size_t... Coords2> | |
struct RecordCoordCommonPrefixIsBiggerImpl<RecordCoord<Coords1...>, RecordCoord<Coords2...>> | |
{ | |
static constexpr auto value = []() constexpr | |
{ | |
// CTAD does not work if Coords1/2 is an empty pack | |
std::array<std::size_t, sizeof...(Coords1)> a1{Coords1...}; | |
std::array<std::size_t, sizeof...(Coords2)> a2{Coords2...}; | |
for(std::size_t i = 0; i < std::min(a1.size(), a2.size()); i++) | |
{ | |
if(a1[i] > a2[i]) | |
return true; | |
if(a1[i] < a2[i]) | |
return false; | |
} | |
return false; | |
} | |
(); | |
}; | |
} // namespace internal | |
/// Checks wether the first RecordCoord is bigger than the second. | |
template<typename First, typename Second> | |
inline constexpr auto RecordCoordCommonPrefixIsBigger | |
= internal::RecordCoordCommonPrefixIsBiggerImpl<First, Second>::value; | |
namespace internal | |
{ | |
template<typename First, typename Second> | |
struct RecordCoordCommonPrefixIsSameImpl; | |
template<std::size_t... Coords1, std::size_t... Coords2> | |
struct RecordCoordCommonPrefixIsSameImpl<RecordCoord<Coords1...>, RecordCoord<Coords2...>> | |
{ | |
static constexpr auto value = []() constexpr | |
{ | |
// CTAD does not work if Coords1/2 is an empty pack | |
std::array<std::size_t, sizeof...(Coords1)> a1{Coords1...}; | |
std::array<std::size_t, sizeof...(Coords2)> a2{Coords2...}; | |
for(std::size_t i = 0; i < std::min(a1.size(), a2.size()); i++) | |
if(a1[i] != a2[i]) | |
return false; | |
return true; | |
} | |
(); | |
}; | |
} // namespace internal | |
/// Checks whether two \ref RecordCoord%s are the same or one is the prefix of the other. | |
template<typename First, typename Second> | |
inline constexpr auto RecordCoordCommonPrefixIsSame | |
= internal::RecordCoordCommonPrefixIsSameImpl<First, Second>::value; | |
} // namespace llama | |
// == | |
// == ./RecordCoord.hpp == | |
// ============================================================================ | |
#include <boost/core/demangle.hpp> | |
#include <iostream> | |
#include <string> | |
// #include <type_traits> // amalgamate: file already included | |
namespace llama | |
{ | |
/// Anonymous naming for a \ref Field. | |
struct NoName | |
{ | |
}; | |
/// A type list of \ref Field%s which may be used to define a record dimension. | |
template<typename... Fields> | |
struct Record | |
{ | |
}; | |
/// @brief Tells whether the given type is allowed as a field type in LLAMA. Such types need to be trivially | |
/// constructible and trivially destructible. | |
template<typename T> | |
inline constexpr bool isAllowedFieldType = std::is_trivially_destructible_v<T>; | |
/// Record dimension tree node which may either be a leaf or refer to a child tree presented as another \ref | |
/// Record. | |
/// \tparam Tag Name of the node. May be any type (struct, class). | |
/// \tparam Type Type of the node. May be one of three cases. 1. another sub tree consisting of a nested \ref | |
/// Record. 2. an array of static size of any type, in which case a Record with as many \ref Field as the array | |
/// size is created, named \ref RecordCoord specialized on consecutive numbers I. 3. A scalar type different from | |
/// \ref Record, making this node a leaf of this type. | |
template<typename Tag, typename Type> | |
struct Field | |
{ | |
static_assert(isAllowedFieldType<Type>, "This field's type is not allowed"); | |
}; | |
struct NrAndOffset | |
{ | |
std::size_t nr; | |
std::size_t offset; | |
friend auto operator==(const NrAndOffset& a, const NrAndOffset& b) -> bool | |
{ | |
return a.nr == b.nr && a.offset == b.offset; | |
} | |
friend auto operator!=(const NrAndOffset& a, const NrAndOffset& b) -> bool | |
{ | |
return !(a == b); | |
} | |
friend auto operator<<(std::ostream& os, const NrAndOffset& value) -> std::ostream& | |
{ | |
return os << "NrAndOffset{" << value.nr << ", " << value.offset << "}"; | |
} | |
}; | |
/// Get the tag from a \ref Field. | |
template<typename Field> | |
using GetFieldTag = boost::mp11::mp_first<Field>; | |
/// Get the type from a \ref Field. | |
template<typename Field> | |
using GetFieldType = boost::mp11::mp_second<Field>; | |
template<typename T> | |
inline constexpr auto isRecord = false; | |
template<typename... Fields> | |
inline constexpr auto isRecord<Record<Fields...>> = true; | |
namespace internal | |
{ | |
template<typename RecordDim, typename RecordCoord> | |
struct GetTagsImpl; | |
template<typename... Fields, std::size_t FirstCoord, std::size_t... Coords> | |
struct GetTagsImpl<Record<Fields...>, RecordCoord<FirstCoord, Coords...>> | |
{ | |
using Field = boost::mp11::mp_at_c<boost::mp11::mp_list<Fields...>, FirstCoord>; | |
using ChildTag = GetFieldTag<Field>; | |
using ChildType = GetFieldType<Field>; | |
using type | |
= boost::mp11::mp_push_front<typename GetTagsImpl<ChildType, RecordCoord<Coords...>>::type, ChildTag>; | |
}; | |
template<typename ChildType, std::size_t Count, std::size_t FirstCoord, std::size_t... Coords> | |
struct GetTagsImpl<ChildType[Count], RecordCoord<FirstCoord, Coords...>> | |
{ | |
using ChildTag = RecordCoord<FirstCoord>; | |
using type | |
= boost::mp11::mp_push_front<typename GetTagsImpl<ChildType, RecordCoord<Coords...>>::type, ChildTag>; | |
}; | |
template<typename T> | |
struct GetTagsImpl<T, RecordCoord<>> | |
{ | |
using type = boost::mp11::mp_list<>; | |
}; | |
} // namespace internal | |
/// Get the tags of all \ref Field%s from the root of the record dimension tree until to the node identified by | |
/// \ref RecordCoord. | |
template<typename RecordDim, typename RecordCoord> | |
using GetTags = typename internal::GetTagsImpl<RecordDim, RecordCoord>::type; | |
namespace internal | |
{ | |
template<typename RecordDim, typename RecordCoord> | |
struct GetTagImpl | |
{ | |
using type = boost::mp11::mp_back<GetTags<RecordDim, RecordCoord>>; | |
}; | |
template<typename RecordDim> | |
struct GetTagImpl<RecordDim, RecordCoord<>> | |
{ | |
using type = NoName; | |
}; | |
} // namespace internal | |
/// Get the tag of the \ref Field at a \ref RecordCoord inside the record dimension tree. | |
template<typename RecordDim, typename RecordCoord> | |
using GetTag = typename internal::GetTagImpl<RecordDim, RecordCoord>::type; | |
/// Is true if, starting at two coordinates in two record dimensions, all subsequent nodes in the record dimension | |
/// tree have the same tag. | |
/// \tparam RecordDimA First record dimension. | |
/// \tparam LocalA \ref RecordCoord based on StartA along which the tags are compared. | |
/// \tparam RecordDimB second record dimension. | |
/// \tparam LocalB \ref RecordCoord based on StartB along which the tags are compared. | |
template<typename RecordDimA, typename LocalA, typename RecordDimB, typename LocalB> | |
inline constexpr auto hasSameTags = []() constexpr | |
{ | |
if constexpr(LocalA::size != LocalB::size) | |
return false; | |
else if constexpr(LocalA::size == 0 && LocalB::size == 0) | |
return true; | |
else | |
return std::is_same_v<GetTags<RecordDimA, LocalA>, GetTags<RecordDimB, LocalB>>; | |
} | |
(); | |
namespace internal | |
{ | |
template<typename FieldList, typename Tag> | |
struct FindFieldByTag | |
{ | |
template<typename Field> | |
using HasTag = std::is_same<GetFieldTag<Field>, Tag>; | |
static constexpr auto value = boost::mp11::mp_find_if<FieldList, HasTag>::value; | |
}; | |
template<typename RecordDim, typename RecordCoord, typename... Tags> | |
struct GetCoordFromTagsImpl | |
{ | |
static_assert(boost::mp11::mp_size<RecordDim>::value != 0, "Tag combination is not valid"); | |
}; | |
template<typename... Fields, std::size_t... ResultCoords, typename FirstTag, typename... Tags> | |
struct GetCoordFromTagsImpl<Record<Fields...>, RecordCoord<ResultCoords...>, FirstTag, Tags...> | |
{ | |
static constexpr auto tagIndex = FindFieldByTag<boost::mp11::mp_list<Fields...>, FirstTag>::value; | |
static_assert( | |
tagIndex < sizeof...(Fields), | |
"FirstTag was not found inside this Record. Does your record dimension contain the tag you access " | |
"with?"); | |
using ChildType = GetFieldType<boost::mp11::mp_at_c<Record<Fields...>, tagIndex>>; | |
using type = | |
typename GetCoordFromTagsImpl<ChildType, RecordCoord<ResultCoords..., tagIndex>, Tags...>::type; | |
}; | |
template< | |
typename ChildType, | |
std::size_t Count, | |
std::size_t... ResultCoords, | |
typename FirstTag, | |
typename... Tags> | |
struct GetCoordFromTagsImpl<ChildType[Count], RecordCoord<ResultCoords...>, FirstTag, Tags...> | |
{ | |
static_assert(isRecordCoord<FirstTag>, "Please use a RecordCoord<I> to index into static arrays"); | |
static_assert(FirstTag::size == 1, "Expected RecordCoord with 1 coordinate"); | |
static_assert(FirstTag::front < Count, "Index out of bounds"); | |
using type = | |
typename GetCoordFromTagsImpl<ChildType, RecordCoord<ResultCoords..., FirstTag::front>, Tags...>::type; | |
}; | |
template<typename RecordDim, typename RecordCoord> | |
struct GetCoordFromTagsImpl<RecordDim, RecordCoord> | |
{ | |
using type = RecordCoord; | |
}; | |
// unpack a list of tags | |
template<typename... Fields, typename... Tags> | |
struct GetCoordFromTagsImpl<Record<Fields...>, RecordCoord<>, boost::mp11::mp_list<Tags...>> | |
: GetCoordFromTagsImpl<Record<Fields...>, RecordCoord<>, Tags...> | |
{ | |
}; | |
template<typename ChildType, std::size_t Count, typename... Tags> | |
struct GetCoordFromTagsImpl<ChildType[Count], RecordCoord<>, boost::mp11::mp_list<Tags...>> | |
: GetCoordFromTagsImpl<ChildType[Count], RecordCoord<>, Tags...> | |
{ | |
}; | |
} // namespace internal | |
/// Converts a series of tags, or a list of tags, navigating down a record dimension into a \ref RecordCoord. | |
template<typename RecordDim, typename... Tags> | |
using GetCoordFromTags = typename internal::GetCoordFromTagsImpl<RecordDim, RecordCoord<>, Tags...>::type; | |
namespace internal | |
{ | |
template<typename RecordDim, typename... RecordCoordOrTags> | |
struct GetTypeImpl | |
{ | |
using type = typename GetTypeImpl<RecordDim, GetCoordFromTags<RecordDim, RecordCoordOrTags...>>::type; | |
}; | |
template<typename... Children, std::size_t HeadCoord, std::size_t... TailCoords> | |
struct GetTypeImpl<Record<Children...>, RecordCoord<HeadCoord, TailCoords...>> | |
{ | |
using ChildType = GetFieldType<boost::mp11::mp_at_c<Record<Children...>, HeadCoord>>; | |
using type = typename GetTypeImpl<ChildType, RecordCoord<TailCoords...>>::type; | |
}; | |
template<typename ChildType, std::size_t N, std::size_t HeadCoord, std::size_t... TailCoords> | |
struct GetTypeImpl<ChildType[N], RecordCoord<HeadCoord, TailCoords...>> | |
{ | |
using type = typename GetTypeImpl<ChildType, RecordCoord<TailCoords...>>::type; | |
}; | |
template<typename T> | |
struct GetTypeImpl<T, RecordCoord<>> | |
{ | |
static_assert(isAllowedFieldType<T>); | |
using type = T; | |
}; | |
} // namespace internal | |
/// Returns the type of a node in a record dimension tree identified by a given \ref RecordCoord or a series of | |
/// tags. | |
template<typename RecordDim, typename... RecordCoordOrTags> | |
using GetType = typename internal::GetTypeImpl<RecordDim, RecordCoordOrTags...>::type; | |
namespace internal | |
{ | |
template<typename RecordDim, typename RecordCoord> | |
struct LeafRecordCoordsImpl; | |
template<typename T, std::size_t... RCs> | |
struct LeafRecordCoordsImpl<T, RecordCoord<RCs...>> | |
{ | |
using type = boost::mp11::mp_list<RecordCoord<RCs...>>; | |
}; | |
template<typename... Fields, std::size_t... RCs> | |
struct LeafRecordCoordsImpl<Record<Fields...>, RecordCoord<RCs...>> | |
{ | |
template<std::size_t... Is> | |
static auto help(std::index_sequence<Is...>) | |
{ | |
return boost::mp11::mp_append< | |
typename LeafRecordCoordsImpl<GetFieldType<Fields>, RecordCoord<RCs..., Is>>::type...>{}; | |
} | |
using type = decltype(help(std::make_index_sequence<sizeof...(Fields)>{})); | |
}; | |
template<typename Child, std::size_t N, std::size_t... RCs> | |
struct LeafRecordCoordsImpl<Child[N], RecordCoord<RCs...>> | |
{ | |
template<std::size_t... Is> | |
static auto help(std::index_sequence<Is...>) | |
{ | |
return boost::mp11::mp_append< | |
typename LeafRecordCoordsImpl<Child, RecordCoord<RCs..., Is>>::type...>{}; | |
} | |
using type = decltype(help(std::make_index_sequence<N>{})); | |
}; | |
} // namespace internal | |
/// Returns a flat type list containing all record coordinates to all leaves of the given record dimension. | |
template<typename RecordDim> | |
using LeafRecordCoords = typename internal::LeafRecordCoordsImpl<RecordDim, RecordCoord<>>::type; | |
namespace internal | |
{ | |
// adapted from boost::mp11, but with LLAMA_FN_HOST_ACC_INLINE | |
template<template<typename...> typename L, typename... T, typename F> | |
LLAMA_FN_HOST_ACC_INLINE constexpr void mp_for_each_inlined(L<T...>, F&& f) | |
{ | |
using A = int[sizeof...(T)]; | |
(void) A{((void) f(T{}), 0)...}; | |
} | |
} // namespace internal | |
/// Iterates over the record dimension tree and calls a functor on each element. | |
/// \param functor Functor to execute at each element of. Needs to have `operator()` with a template parameter for | |
/// the \ref RecordCoord in the record dimension tree. | |
/// \param baseCoord \ref RecordCoord at which the iteration should be started. The functor is called on elements | |
/// beneath this coordinate. | |
template<typename RecordDim, typename Functor, std::size_t... Coords> | |
LLAMA_FN_HOST_ACC_INLINE constexpr void forEachLeafCoord(Functor&& functor, RecordCoord<Coords...> baseCoord) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
internal::mp_for_each_inlined( | |
LeafRecordCoords<GetType<RecordDim, RecordCoord<Coords...>>>{}, | |
[&](auto innerCoord) LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(constexpr) | |
{ std::forward<Functor>(functor)(cat(baseCoord, innerCoord)); }); | |
} | |
/// Iterates over the record dimension tree and calls a functor on each element. | |
/// \param functor Functor to execute at each element of. Needs to have `operator()` with a template parameter for | |
/// the \ref RecordCoord in the record dimension tree. | |
/// \param baseTags Tags used to define where the iteration should be started. The functor is called on elements | |
/// beneath this coordinate. | |
template<typename RecordDim, typename Functor, typename... Tags> | |
LLAMA_FN_HOST_ACC_INLINE constexpr void forEachLeafCoord(Functor&& functor, Tags... /*baseTags*/) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
forEachLeafCoord<RecordDim>(std::forward<Functor>(functor), GetCoordFromTags<RecordDim, Tags...>{}); | |
} | |
namespace internal | |
{ | |
template<typename T> | |
struct FlattenRecordDimImpl | |
{ | |
using type = boost::mp11::mp_list<T>; | |
}; | |
template<typename... Fields> | |
struct FlattenRecordDimImpl<Record<Fields...>> | |
{ | |
using type = boost::mp11::mp_append<typename FlattenRecordDimImpl<GetFieldType<Fields>>::type...>; | |
}; | |
template<typename Child, std::size_t N> | |
struct FlattenRecordDimImpl<Child[N]> | |
{ | |
using type = boost::mp11::mp_repeat_c<typename FlattenRecordDimImpl<Child>::type, N>; | |
}; | |
} // namespace internal | |
/// Returns a flat type list containing all leaf field types of the given record dimension. | |
template<typename RecordDim> | |
using FlatRecordDim = typename internal::FlattenRecordDimImpl<RecordDim>::type; | |
/// The total number of fields in the recursively expanded record dimension. | |
template<typename RecordDim> | |
inline constexpr std::size_t flatFieldCount = 1; | |
template<typename... Children> | |
inline constexpr std::size_t flatFieldCount< | |
Record<Children...>> = (flatFieldCount<GetFieldType<Children>> + ... + 0); | |
template<typename Child, std::size_t N> | |
inline constexpr std::size_t flatFieldCount<Child[N]> = flatFieldCount<Child>* N; | |
namespace internal | |
{ | |
template<std::size_t I, typename RecordDim> | |
inline constexpr std::size_t flatFieldCountBefore = 0; | |
template<typename... Children> | |
inline constexpr std::size_t flatFieldCountBefore<0, Record<Children...>> = 0; | |
// recursive formulation to benefit from template instantiation memoization | |
// this massively improves compilation time when this template is instantiated with a lot of different I | |
template<std::size_t I, typename... Children> | |
inline constexpr std::size_t flatFieldCountBefore< | |
I, | |
Record< | |
Children...>> = flatFieldCountBefore<I - 1, Record<Children...>> + flatFieldCount<GetFieldType<boost::mp11::mp_at_c<Record<Children...>, I - 1>>>; | |
} // namespace internal | |
/// The equivalent zero based index into a flat record dimension (\ref FlatRecordDim) of the given hierarchical | |
/// record coordinate. | |
template<typename RecordDim, typename RecordCoord> | |
inline constexpr std::size_t flatRecordCoord = 0; | |
template<typename T> | |
inline constexpr std::size_t flatRecordCoord<T, RecordCoord<>> = 0; | |
template<typename... Children, std::size_t I, std::size_t... Is> | |
inline constexpr std::size_t flatRecordCoord< | |
Record<Children...>, | |
RecordCoord< | |
I, | |
Is...>> = internal:: | |
flatFieldCountBefore< | |
I, | |
Record< | |
Children...>> + flatRecordCoord<GetFieldType<boost::mp11::mp_at_c<Record<Children...>, I>>, RecordCoord<Is...>>; | |
template<typename Child, std::size_t N, std::size_t I, std::size_t... Is> | |
inline constexpr std::size_t flatRecordCoord<Child[N], RecordCoord<I, Is...>> = flatFieldCount<Child>* I | |
+ flatRecordCoord<Child, RecordCoord<Is...>>; | |
namespace internal | |
{ | |
template<typename TypeList> | |
constexpr auto flatAlignOfImpl() | |
{ | |
using namespace boost::mp11; | |
std::size_t maxAlign = 0; | |
mp_for_each<mp_transform<mp_identity, TypeList>>([&](auto e) constexpr | |
{ | |
using T = typename decltype(e)::type; | |
maxAlign = std::max(maxAlign, alignof(T)); | |
}); | |
return maxAlign; | |
} | |
} // namespace internal | |
/// The alignment of a type list if its elements would be in a normal struct. | |
template<typename TypeList> | |
inline constexpr std::size_t flatAlignOf = internal::flatAlignOfImpl<TypeList>(); | |
/// The alignment of a type T. | |
template<typename T> | |
inline constexpr std::size_t alignOf = alignof(T); | |
/// The alignment of a record dimension if its fields would be in a normal struct. | |
template<typename... Fields> | |
inline constexpr std::size_t alignOf<Record<Fields...>> = flatAlignOf<FlatRecordDim<Record<Fields...>>>; | |
namespace internal | |
{ | |
constexpr void roundUpToMultiple(std::size_t& value, std::size_t multiple) | |
{ | |
value = ((value + multiple - 1) / multiple) * multiple; | |
} | |
template<typename TypeList, bool Align, bool IncludeTailPadding> | |
constexpr auto sizeOfImpl() -> std::size_t | |
{ | |
using namespace boost::mp11; | |
std::size_t size = 0; | |
std::size_t maxAlign = 0; | |
mp_for_each<mp_transform<mp_identity, TypeList>>([&](auto e) constexpr | |
{ | |
using T = typename decltype(e)::type; | |
if constexpr(Align) | |
{ | |
roundUpToMultiple(size, alignof(T)); | |
maxAlign = std::max(maxAlign, alignof(T)); | |
} | |
// NOLINTNEXTLINE(readability-misleading-indentation) | |
size += sizeof(T); | |
}); | |
// final padding, so next struct can start right away | |
if constexpr(Align && IncludeTailPadding) | |
roundUpToMultiple(size, maxAlign); // TODO(bgruber): we could use flatAlignOf<TypeList> here, at the | |
// cost of more template instantiations | |
return size; | |
} | |
template<bool Align, typename TypeList, std::size_t I> | |
constexpr auto offsetOfImplWorkaround() -> std::size_t; | |
// recursive formulation to benefit from template instantiation memoization | |
// this massively improves compilation time when this template is instantiated with a lot of different I | |
template<bool Align, typename TypeList, std::size_t I> | |
inline constexpr std::size_t offsetOfImpl | |
= offsetOfImplWorkaround<Align, TypeList, I>(); // FIXME: MSVC fails to compile an IILE here. | |
template<bool Align, typename TypeList> | |
inline constexpr std::size_t offsetOfImpl<Align, TypeList, 0> = 0; | |
template<bool Align, typename TypeList, std::size_t I> | |
constexpr auto offsetOfImplWorkaround() -> std::size_t | |
{ | |
std::size_t offset = offsetOfImpl<Align, TypeList, I - 1> + sizeof(boost::mp11::mp_at_c<TypeList, I - 1>); | |
if constexpr(Align) | |
roundUpToMultiple(offset, alignof(boost::mp11::mp_at_c<TypeList, I>)); | |
return offset; | |
} | |
} // namespace internal | |
/// The size of a type list if its elements would be in a normal struct. | |
template<typename TypeList, bool Align, bool IncludeTailPadding = true> | |
inline constexpr std::size_t flatSizeOf = internal::sizeOfImpl<TypeList, Align, IncludeTailPadding>(); | |
/// The size of a type T. | |
template<typename T, bool Align = false, bool IncludeTailPadding = true> | |
inline constexpr std::size_t sizeOf = sizeof(T); | |
/// The size of a record dimension if its fields would be in a normal struct. | |
template<typename... Fields, bool Align, bool IncludeTailPadding> | |
inline constexpr std::size_t sizeOf<Record<Fields...>, Align, IncludeTailPadding> = flatSizeOf< | |
FlatRecordDim<Record<Fields...>>, | |
Align, | |
IncludeTailPadding>; | |
/// The byte offset of an element in a type list ifs elements would be in a normal struct. | |
template<typename TypeList, std::size_t I, bool Align> | |
inline constexpr std::size_t flatOffsetOf = internal::offsetOfImpl<Align, TypeList, I>; | |
/// The byte offset of an element in a record dimension if it would be a normal struct. | |
/// \tparam RecordDim Record dimension tree. | |
/// \tparam RecordCoord Record coordinate of an element inrecord dimension tree. | |
template<typename RecordDim, typename RecordCoord, bool Align = false> | |
inline constexpr std::size_t offsetOf | |
= flatOffsetOf<FlatRecordDim<RecordDim>, flatRecordCoord<RecordDim, RecordCoord>, Align>; | |
template<typename S> | |
auto structName(S = {}) -> std::string | |
{ | |
auto s = boost::core::demangle(typeid(S).name()); | |
if(const auto pos = s.rfind(':'); pos != std::string::npos) | |
s = s.substr(pos + 1); | |
return s; | |
} | |
namespace internal | |
{ | |
template<typename T> | |
struct IndirectValue | |
{ | |
T value; | |
auto operator->() -> T* | |
{ | |
return &value; | |
} | |
auto operator->() const -> const T* | |
{ | |
return &value; | |
} | |
}; | |
// TODO(bgruber): replace in C++20 | |
template<class T> | |
struct IsBoundedArray : std::false_type | |
{ | |
}; | |
template<class T, std::size_t N> | |
struct IsBoundedArray<T[N]> : std::true_type | |
{ | |
}; | |
} // namespace internal | |
/// Returns the integral n rounded up to be a multiple of mult. | |
template<typename Integral> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto roundUpToMultiple(Integral n, Integral mult) -> Integral | |
{ | |
return (n + mult - 1) / mult * mult; | |
} | |
namespace internal | |
{ | |
template<typename T, template<typename> typename TypeFunctor> | |
struct TransformLeavesImpl | |
{ | |
using type = TypeFunctor<T>; | |
}; | |
template<typename... Fields, template<typename> typename TypeFunctor> | |
struct TransformLeavesImpl<Record<Fields...>, TypeFunctor> | |
{ | |
using type = Record< | |
Field<GetFieldTag<Fields>, typename TransformLeavesImpl<GetFieldType<Fields>, TypeFunctor>::type>...>; | |
}; | |
template<typename Child, std::size_t N, template<typename> typename TypeFunctor> | |
struct TransformLeavesImpl<Child[N], TypeFunctor> | |
{ | |
using type = typename TransformLeavesImpl<Child, TypeFunctor>::type[N]; | |
}; | |
} // namespace internal | |
/// Creates a new record dimension where each new leaf field's type is the result of applying FieldTypeFunctor to | |
/// the original leaf field's type. | |
template<typename RecordDim, template<typename> typename FieldTypeFunctor> | |
using TransformLeaves = typename internal::TransformLeavesImpl<RecordDim, FieldTypeFunctor>::type; | |
namespace internal | |
{ | |
// TODO: we might implement this better by expanding a record dim into a list of tag lists and then computing a | |
// real set union of the two tag list lists | |
template<typename A, typename B> | |
auto mergeRecordDimsImpl(boost::mp11::mp_identity<A> a, boost::mp11::mp_identity<B>) | |
{ | |
static_assert(std::is_same_v<A, B>, "Cannot merge record and non-record or fields with different types"); | |
return a; | |
} | |
template<typename A, std::size_t NA, typename B, std::size_t NB> | |
auto mergeRecordDimsImpl( | |
[[maybe_unused]] boost::mp11::mp_identity<A[NA]> a, | |
[[maybe_unused]] boost::mp11::mp_identity<B[NB]> b) | |
{ | |
static_assert(std::is_same_v<A, B>, "Cannot merge arrays of different type"); | |
if constexpr(NA < NB) | |
return b; | |
else | |
return a; | |
} | |
template<typename... FieldsA> | |
auto mergeRecordDimsImpl(boost::mp11::mp_identity<Record<FieldsA...>> a, boost::mp11::mp_identity<Record<>>) | |
{ | |
return a; | |
} | |
template< | |
typename... FieldsA, | |
typename FieldB, | |
typename... FieldsB, | |
auto pos = FindFieldByTag<Record<FieldsA...>, GetFieldTag<FieldB>>::value> | |
auto mergeRecordDimsImpl( | |
boost::mp11::mp_identity<Record<FieldsA...>>, | |
boost::mp11::mp_identity<Record<FieldB, FieldsB...>>) | |
{ | |
using namespace boost::mp11; | |
if constexpr(pos == sizeof...(FieldsA)) | |
{ | |
return mergeRecordDimsImpl( | |
mp_identity<Record<FieldsA..., FieldB>>{}, | |
mp_identity<Record<FieldsB...>>{}); | |
} | |
else | |
{ | |
using OldFieldA = mp_at_c<Record<FieldsA...>, pos>; | |
using NewFieldA = Field< | |
GetFieldTag<OldFieldA>, | |
typename decltype(mergeRecordDimsImpl( | |
mp_identity<GetFieldType<OldFieldA>>{}, | |
mp_identity<GetFieldType<FieldB>>{}))::type>; | |
using NewRecordA = mp_replace_at_c<Record<FieldsA...>, pos, NewFieldA>; | |
return mergeRecordDimsImpl(mp_identity<NewRecordA>{}, mp_identity<Record<FieldsB...>>{}); | |
} | |
} | |
} // namespace internal | |
/// Creates a merged record dimension, where duplicated, nested fields are unified. | |
template<typename RecordDimA, typename RecordDimB> | |
using MergedRecordDims = typename decltype(internal::mergeRecordDimsImpl( | |
boost::mp11::mp_identity<RecordDimA>{}, | |
boost::mp11::mp_identity<RecordDimB>{}))::type; | |
/// Returns the tags interspersed by '.' represented by the given record coord in the given record dimension. | |
template<typename RecordDim, std::size_t... Coords> | |
auto recordCoordTags(RecordCoord<Coords...>) -> std::string | |
{ | |
using Tags = GetTags<RecordDim, RecordCoord<Coords...>>; | |
std::string r; | |
boost::mp11::mp_for_each<Tags>( | |
[&](auto tag) | |
{ | |
using Tag = decltype(tag); | |
if(!r.empty()) | |
r += '.'; | |
if constexpr(isRecordCoord<Tag>) | |
{ | |
static_assert(Tag::size == 1); | |
r += std::to_string(Tag::front); // handle array indices | |
} | |
else | |
r += structName(tag); | |
}); | |
return r; | |
} | |
} // namespace llama | |
// == | |
// == ./Core.hpp == | |
// ============================================================================ | |
#include <algorithm> | |
#include <iterator> | |
// #include <limits> // amalgamate: file already included | |
#if CAN_USE_RANGES | |
# include <ranges> | |
#endif | |
namespace llama | |
{ | |
/// Iterator supporting \ref ArrayIndexRange. | |
template<typename ArrayExtents> | |
struct ArrayIndexIterator | |
{ | |
static_assert(!std::is_const_v<ArrayExtents>); | |
using value_type = typename ArrayExtents::Index; | |
using difference_type = std::ptrdiff_t; | |
using reference = value_type; | |
using pointer = internal::IndirectValue<value_type>; | |
using iterator_category = std::random_access_iterator_tag; | |
static constexpr std::size_t rank = ArrayExtents::rank; | |
constexpr ArrayIndexIterator() noexcept = default; | |
LLAMA_FN_HOST_ACC_INLINE constexpr ArrayIndexIterator(ArrayExtents extents, value_type current) noexcept | |
: extents(extents) | |
, current(current) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator*() const noexcept -> value_type | |
{ | |
return current; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator->() const noexcept -> pointer | |
{ | |
return {**this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator++() noexcept -> ArrayIndexIterator& | |
{ | |
current[rank - 1]++; | |
for(auto i = static_cast<int>(rank) - 2; i >= 0; i--) | |
{ | |
if(current[i + 1] != extents[i + 1]) | |
return *this; | |
current[i + 1] = 0; | |
current[i]++; | |
} | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator++(int) noexcept -> ArrayIndexIterator | |
{ | |
auto tmp = *this; | |
++*this; | |
return tmp; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator--() noexcept -> ArrayIndexIterator& | |
{ | |
current[rank - 1]--; | |
for(auto i = static_cast<int>(rank) - 2; i >= 0; i--) | |
{ | |
if(current[i + 1] != std::numeric_limits<std::size_t>::max()) | |
return *this; | |
current[i + 1] = extents[i] - 1; | |
current[i]--; | |
} | |
// decrementing beyond [0, 0, ..., 0] is UB | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator--(int) noexcept -> ArrayIndexIterator | |
{ | |
auto tmp = *this; | |
--*this; | |
return tmp; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator[](difference_type i) const noexcept -> reference | |
{ | |
return *(*this + i); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator+=(difference_type n) noexcept -> ArrayIndexIterator& | |
{ | |
// add n to all lower dimensions with carry | |
for(auto i = static_cast<int>(rank) - 1; i > 0 && n != 0; i--) | |
{ | |
n += static_cast<difference_type>(current[i]); | |
const auto s = static_cast<difference_type>(extents[i]); | |
auto mod = n % s; | |
n /= s; | |
if(mod < 0) | |
{ | |
mod += s; | |
n--; | |
} | |
current[i] = mod; | |
assert(current[i] < extents[i]); | |
} | |
current[0] = static_cast<difference_type>(current[0]) + n; | |
// current is either within bounds or at the end ([last + 1, 0, 0, ..., 0]) | |
assert( | |
(current[0] < extents[0] | |
|| (current[0] == extents[0] | |
&& std::all_of(std::begin(current) + 1, std::end(current), [](auto c) { return c == 0; }))) | |
&& "Iterator was moved past the end"); | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator+(ArrayIndexIterator it, difference_type n) noexcept -> ArrayIndexIterator | |
{ | |
it += n; | |
return it; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator+(difference_type n, ArrayIndexIterator it) noexcept -> ArrayIndexIterator | |
{ | |
return it + n; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator-=(difference_type n) noexcept -> ArrayIndexIterator& | |
{ | |
return operator+=(-n); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator-(ArrayIndexIterator it, difference_type n) noexcept -> ArrayIndexIterator | |
{ | |
it -= n; | |
return it; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator-(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept | |
-> difference_type | |
{ | |
assert(a.extents == b.extents); | |
difference_type n = a.current[rank - 1] - b.current[rank - 1]; | |
difference_type size = a.extents[rank - 1]; | |
for(auto i = static_cast<int>(rank) - 2; i >= 0; i--) | |
{ | |
n += (a.current[i] - b.current[i]) * size; | |
size *= a.extents[i]; | |
} | |
return n; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator==( | |
const ArrayIndexIterator<ArrayExtents>& a, | |
const ArrayIndexIterator<ArrayExtents>& b) noexcept -> bool | |
{ | |
assert(a.extents == b.extents); | |
return a.current == b.current; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator!=( | |
const ArrayIndexIterator<ArrayExtents>& a, | |
const ArrayIndexIterator<ArrayExtents>& b) noexcept -> bool | |
{ | |
return !(a == b); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator<(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool | |
{ | |
assert(a.extents == b.extents); | |
return std::lexicographical_compare( | |
std::begin(a.current), | |
std::end(a.current), | |
std::begin(b.current), | |
std::end(b.current)); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator>(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool | |
{ | |
return b < a; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator<=(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool | |
{ | |
return !(a > b); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator>=(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool | |
{ | |
return !(a < b); | |
} | |
private: | |
ArrayExtents extents; // TODO(bgruber): we only need to store rank - 1 sizes | |
value_type current; | |
}; | |
/// Range allowing to iterate over all indices in an \ref ArrayExtents. | |
template<typename ArrayExtents> | |
struct ArrayIndexRange | |
: private ArrayExtents | |
#if CAN_USE_RANGES | |
, std::ranges::view_base | |
#endif | |
{ | |
static_assert(!std::is_const_v<ArrayExtents>); | |
constexpr ArrayIndexRange() noexcept = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr explicit ArrayIndexRange(ArrayExtents extents) noexcept : ArrayExtents(extents) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto begin() const noexcept -> ArrayIndexIterator<ArrayExtents> | |
{ | |
return {*this, typename ArrayExtents::Index{}}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto end() const noexcept -> ArrayIndexIterator<ArrayExtents> | |
{ | |
auto endPos = typename ArrayExtents::Index{}; | |
endPos[0] = this->toArray()[0]; | |
return {*this, endPos}; | |
} | |
}; | |
} // namespace llama | |
// == | |
// == ./ArrayIndexRange.hpp == | |
// ============================================================================ | |
// #include "Core.hpp" // amalgamate: file already expanded | |
namespace llama | |
{ | |
namespace internal | |
{ | |
constexpr auto divRoundUp(std::size_t dividend, std::size_t divisor) -> std::size_t | |
{ | |
return (dividend + divisor - 1) / divisor; | |
} | |
} // namespace internal | |
// FIXME: this test is actually not correct, because __cpp_constexpr_dynamic_alloc only guarantees constexpr | |
// std::allocator | |
#ifdef __cpp_constexpr_dynamic_alloc | |
namespace internal | |
{ | |
template<typename T> | |
struct DynArray | |
{ | |
constexpr DynArray() = default; | |
constexpr DynArray(std::size_t n) | |
{ | |
data = new T[n]{}; | |
} | |
constexpr ~DynArray() | |
{ | |
delete[] data; | |
} | |
constexpr void resize(std::size_t n) | |
{ | |
delete[] data; | |
data = new T[n]{}; | |
} | |
T* data = nullptr; | |
}; | |
} // namespace internal | |
/// Proofs by exhaustion of the array and record dimensions, that all values mapped to memory do not overlap. | |
// Unfortunately, this only works for smallish array dimensions, because of compiler limits on constexpr evaluation | |
// depth. | |
template<typename Mapping> | |
constexpr auto mapsNonOverlappingly(const Mapping& m) -> bool | |
{ | |
internal::DynArray<internal::DynArray<std::uint64_t>> blobByteMapped(m.blobCount); | |
for(std::size_t i = 0; i < m.blobCount; i++) | |
blobByteMapped.data[i].resize(internal::divRoundUp(m.blobSize(i), 64)); | |
auto testAndSet = [&](auto blob, auto offset) constexpr | |
{ | |
const auto bit = std::uint64_t{1} << (offset % 64); | |
if(blobByteMapped.data[blob].data[offset / 64] & bit) | |
return true; | |
blobByteMapped.data[blob].data[offset / 64] |= bit; | |
return false; | |
}; | |
bool collision = false; | |
forEachLeafCoord<typename Mapping::RecordDim>([&](auto rc) constexpr | |
{ | |
if(collision) | |
return; | |
for(auto ai : ArrayIndexRange{m.extents()}) | |
{ | |
using Type | |
= GetType<typename Mapping::RecordDim, decltype(rc)>; | |
const auto [blob, offset] = m.blobNrAndOffset(ai, rc); | |
for(std::size_t b = 0; b < sizeof(Type); b++) | |
if(testAndSet(blob, offset + b)) | |
{ | |
collision = true; | |
break; | |
} | |
} | |
}); | |
return !collision; | |
} | |
#endif | |
/// Proofs by exhaustion of the array and record dimensions, that at least PieceLength elements are always stored | |
/// contiguously. | |
// Unfortunately, this only works for smallish array dimensions, because of compiler limits on constexpr evaluation | |
// depth. | |
template<std::size_t PieceLength, typename Mapping> | |
constexpr auto mapsPiecewiseContiguous(const Mapping& m) -> bool | |
{ | |
bool collision = false; | |
forEachLeafCoord<typename Mapping::RecordDim>([&](auto rc) constexpr | |
{ | |
std::size_t flatIndex = 0; | |
std::size_t lastBlob | |
= std::numeric_limits<std::size_t>::max(); | |
std::size_t lastOffset | |
= std::numeric_limits<std::size_t>::max(); | |
for(auto ai : ArrayIndexRange{m.extents()}) | |
{ | |
using Type | |
= GetType<typename Mapping::RecordDim, decltype(rc)>; | |
const auto [blob, offset] = m.blobNrAndOffset(ai, rc); | |
if(flatIndex % PieceLength != 0 | |
&& (lastBlob != blob | |
|| lastOffset + sizeof(Type) != offset)) | |
{ | |
collision = true; | |
break; | |
} | |
lastBlob = blob; | |
lastOffset = offset; | |
flatIndex++; | |
} | |
}); | |
return !collision; | |
} | |
} // namespace llama | |
// == | |
// == ./Proofs.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./Vector.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// ============================================================================ | |
// == ./View.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Array.hpp" // amalgamate: file already expanded | |
// #include "ArrayIndexRange.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./BlobAllocators.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Array.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./Concepts.hpp == | |
// == | |
// #pragma once | |
// #include "Array.hpp" // amalgamate: file already expanded | |
// #include "Core.hpp" // amalgamate: file already expanded | |
// #include <type_traits> // amalgamate: file already included | |
#if __has_include(<concepts>) | |
# include <concepts> | |
#endif | |
#ifdef __cpp_lib_concepts | |
namespace llama | |
{ | |
// clang-format off | |
template <typename M> | |
concept Mapping = requires(M m) { | |
typename M::ArrayExtents; | |
typename M::ArrayIndex; | |
typename M::RecordDim; | |
{ m.extents() } -> std::same_as<typename M::ArrayExtents>; | |
{ M::blobCount } -> std::convertible_to<std::size_t>; | |
Array<int, M::blobCount>{}; // validates constexpr-ness | |
{ m.blobSize(std::size_t{}) } -> std::same_as<std::size_t>; | |
{ m.blobNrAndOffset(typename M::ArrayIndex{}) } -> std::same_as<NrAndOffset>; | |
{ m.template blobNrAndOffset<0>(typename M::ArrayIndex{}) } -> std::same_as<NrAndOffset>; | |
{ m.blobNrAndOffset(typename M::ArrayIndex{}, llama::RecordCoord<0>{}) } -> std::same_as<NrAndOffset>; | |
}; | |
// clang-format on | |
template<typename B> | |
concept Blob = requires(B b, std::size_t i) | |
{ | |
// according to http://eel.is/c++draft/intro.object#3 only std::byte and unsigned char can provide storage for | |
// other types | |
std::is_same_v<decltype(b[i]), std::byte&> || std::is_same_v<decltype(b[i]), unsigned char&>; | |
}; | |
// clang-format off | |
template <typename BA> | |
concept BlobAllocator = requires(BA ba, std::integral_constant<std::size_t, 16> alignment, std::size_t size) { | |
{ ba(alignment, size) } -> Blob; | |
}; | |
// clang-format on | |
} // namespace llama | |
#endif | |
// == | |
// == ./Concepts.hpp == | |
// ============================================================================ | |
// #include "macros.hpp" // amalgamate: file already expanded | |
#include <cstddef> | |
#include <memory> | |
#include <vector> | |
#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 11000 | |
# include <boost/shared_ptr.hpp> | |
#endif | |
namespace llama::bloballoc | |
{ | |
/// Allocates stack memory for a \ref View, which is copied each time a \ref View is copied. | |
/// \tparam BytesToReserve the amount of memory to reserve. | |
template<std::size_t BytesToReserve> | |
struct Stack | |
{ | |
template<std::size_t Alignment> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t) const | |
{ | |
struct alignas(Alignment) AlignedArray : Array<std::byte, BytesToReserve> | |
{ | |
}; | |
return AlignedArray{}; | |
} | |
}; | |
#ifdef __cpp_lib_concepts | |
static_assert(BlobAllocator<Stack<64>>); | |
#endif | |
/// Allocates heap memory managed by a `std::shared_ptr` for a \ref View. This memory is shared between all copies | |
/// of a \ref View. | |
struct SharedPtr | |
{ | |
// libc++ below 11.0.0 does not yet support shared_ptr with arrays | |
template<typename T> | |
using shared_ptr = | |
#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 11000 | |
boost::shared_ptr<T>; | |
#else | |
std::shared_ptr<T>; | |
#endif | |
template<std::size_t Alignment> | |
auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const | |
-> shared_ptr<std::byte[]> | |
{ | |
auto* ptr | |
= static_cast<std::byte*>(::operator new[](count * sizeof(std::byte), std::align_val_t{Alignment})); | |
auto deleter = [=](std::byte* ptr) { ::operator delete[](ptr, std::align_val_t{Alignment}); }; | |
return shared_ptr<std::byte[]>{ptr, deleter}; | |
} | |
}; | |
#ifdef __cpp_lib_concepts | |
static_assert(BlobAllocator<SharedPtr>); | |
#endif | |
/// An STL compatible allocator allowing to specify alignment. | |
template<typename T, std::size_t Alignment> | |
struct AlignedAllocator | |
{ | |
using value_type = T; | |
inline AlignedAllocator() noexcept = default; | |
template<typename T2> | |
inline explicit AlignedAllocator(AlignedAllocator<T2, Alignment> const&) noexcept | |
{ | |
} | |
inline auto allocate(std::size_t n) -> T* | |
{ | |
return static_cast<T*>(::operator new[](n * sizeof(T), std::align_val_t{Alignment})); | |
} | |
inline void deallocate(T* p, std::size_t) | |
{ | |
::operator delete[](p, std::align_val_t{Alignment}); | |
} | |
template<typename T2> | |
struct rebind // NOLINT(readability-identifier-naming) | |
{ | |
using other = AlignedAllocator<T2, Alignment>; | |
}; | |
auto operator!=(const AlignedAllocator<T, Alignment>& other) const -> bool | |
{ | |
return !(*this == other); | |
} | |
auto operator==(const AlignedAllocator<T, Alignment>&) const -> bool | |
{ | |
return true; | |
} | |
}; | |
/// Allocates heap memory managed by a `std::vector` for a \ref View, which is copied each time a \ref View is | |
/// copied. | |
struct Vector | |
{ | |
template<std::size_t Alignment> | |
inline auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const | |
{ | |
return std::vector<std::byte, AlignedAllocator<std::byte, Alignment>>(count); | |
} | |
}; | |
#ifdef __cpp_lib_concepts | |
static_assert(BlobAllocator<Vector>); | |
#endif | |
} // namespace llama::bloballoc | |
// == | |
// == ./BlobAllocators.hpp == | |
// ============================================================================ | |
// #include "Concepts.hpp" // amalgamate: file already expanded | |
// #include "Core.hpp" // amalgamate: file already expanded | |
// #include "macros.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/One.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "../Core.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/Common.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "../Core.hpp" // amalgamate: file already expanded | |
#include <climits> | |
namespace llama::mapping | |
{ | |
/// Functor that maps an \ref ArrayIndex into linear numbers the way C++ arrays work. The fast moving index of the | |
/// ArrayIndex object should be the last one. E.g. ArrayIndex<3> a; stores 3 indices where a[2] should be | |
/// incremented in the innermost loop. | |
struct LinearizeArrayDimsCpp | |
{ | |
template<typename ArrayExtents> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) -> std::size_t | |
{ | |
return product(extents); | |
} | |
/// \param ai Index in the array dimensions. | |
/// \param extents Total size of the array dimensions. | |
/// \return Linearized index. | |
template<typename ArrayExtents> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator()( | |
const typename ArrayExtents::Index& ai, | |
const ArrayExtents& extents) const -> std::size_t | |
{ | |
if constexpr(ArrayExtents::rank == 0) | |
return 0; | |
else | |
{ | |
std::size_t address = ai[0]; | |
for(std::size_t i = 1; i < ArrayExtents::rank; i++) | |
{ | |
address *= extents[i]; | |
address += ai[i]; | |
} | |
return address; | |
} | |
} | |
}; | |
/// Functor that maps a \ref ArrayIndex into linear numbers the way Fortran arrays work. The fast moving index of | |
/// the ArrayIndex object should be the last one. E.g. ArrayIndex<3> a; stores 3 indices where a[2] should be | |
/// incremented in the innermost loop. | |
struct LinearizeArrayDimsFortran | |
{ | |
template<typename ArrayExtents> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) -> std::size_t | |
{ | |
return product(extents); | |
} | |
/// \param ai Index in the array dimensions. | |
/// \param extents Total size of the array dimensions. | |
/// \return Linearized index. | |
template<typename ArrayExtents> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator()( | |
const typename ArrayExtents::Index& ai, | |
const ArrayExtents& extents) const -> std::size_t | |
{ | |
if constexpr(ArrayExtents::rank == 0) | |
return 0; | |
else | |
{ | |
std::size_t address = ai[ArrayExtents::rank - 1]; | |
for(int i = static_cast<int>(ArrayExtents::rank) - 2; i >= 0; i--) | |
{ | |
address *= extents[i]; | |
address += ai[i]; | |
} | |
return address; | |
} | |
} | |
}; | |
/// Functor that maps an \ref ArrayIndex into linear numbers using the Z-order space filling curve (Morton codes). | |
struct LinearizeArrayDimsMorton | |
{ | |
template<typename ArrayExtents> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) const -> std::size_t | |
{ | |
if constexpr(ArrayExtents::rank == 0) | |
return 0; | |
else | |
{ | |
std::size_t longest = extents[0]; | |
for(std::size_t i = 1; i < ArrayExtents::rank; i++) | |
longest = std::max(longest, extents[i]); | |
const auto longestPO2 = bit_ceil(longest); | |
return intPow(longestPO2, ArrayExtents::rank); | |
} | |
} | |
/// \param ai Coordinate in the array dimensions. | |
/// \param extents Total size of the array dimensions. | |
/// \return Linearized index. | |
template<typename ArrayExtents> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator()( | |
const typename ArrayExtents::Index& ai, | |
[[maybe_unused]] const ArrayExtents& extents) const -> std::size_t | |
{ | |
std::size_t r = 0; | |
for(std::size_t bit = 0; bit < (sizeof(std::size_t) * CHAR_BIT) / ArrayExtents::rank; bit++) | |
for(std::size_t i = 0; i < ArrayExtents::rank; i++) | |
r |= (ai[i] & (std::size_t{1} << bit)) << ((bit + 1) * (ArrayExtents::rank - 1) - i); | |
return r; | |
} | |
private: | |
LLAMA_FN_HOST_ACC_INLINE static constexpr auto bit_ceil(std::size_t n) -> std::size_t | |
{ | |
std::size_t r = 1; | |
while(r < n) | |
r <<= 1u; | |
return r; | |
} | |
LLAMA_FN_HOST_ACC_INLINE static constexpr auto intPow(std::size_t b, std::size_t e) -> std::size_t | |
{ | |
e--; | |
auto r = b; | |
while(e != 0u) | |
{ | |
r *= b; | |
e--; | |
} | |
return r; | |
} | |
}; | |
/// Flattens the record dimension in the order fields are written. | |
template<typename RecordDim> | |
struct FlattenRecordDimInOrder | |
{ | |
using FlatRecordDim = llama::FlatRecordDim<RecordDim>; | |
template<std::size_t... RecordCoords> | |
static constexpr std::size_t flatIndex = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>; | |
}; | |
/// Flattens the record dimension by sorting the fields according to a given predicate on the field types. | |
/// @tparam Less A binary predicate accepting two field types, which exposes a member value. Value must be true if | |
/// the first field type is less than the second one, otherwise false. | |
template<typename RecordDim, template<typename, typename> typename Less> | |
struct FlattenRecordDimSorted | |
{ | |
private: | |
using FlatOrigRecordDim = llama::FlatRecordDim<RecordDim>; | |
using FlatSortedRecordDim = boost::mp11::mp_sort<FlatOrigRecordDim, Less>; | |
template<typename A, typename B> | |
using LessWithIndices | |
= Less<boost::mp11::mp_at<FlatOrigRecordDim, A>, boost::mp11::mp_at<FlatOrigRecordDim, B>>; | |
// A permutation from new FlatSortedRecordDim index to old FlatOrigRecordDim index | |
using PermutedIndices | |
= boost::mp11::mp_sort<boost::mp11::mp_iota<boost::mp11::mp_size<FlatOrigRecordDim>>, LessWithIndices>; | |
template<typename A, typename B> | |
using LessInvertPermutation = std::bool_constant<( | |
boost::mp11::mp_at<PermutedIndices, A>::value < boost::mp11::mp_at<PermutedIndices, B>::value)>; | |
// A permutation from old FlatOrigRecordDim index to new FlatSortedRecordDim index | |
using InversePermutedIndices = boost::mp11:: | |
mp_sort<boost::mp11::mp_iota<boost::mp11::mp_size<FlatOrigRecordDim>>, LessInvertPermutation>; | |
public: | |
using FlatRecordDim = FlatSortedRecordDim; | |
template<std::size_t... RecordCoords> | |
static constexpr std::size_t flatIndex = []() constexpr | |
{ | |
constexpr auto indexBefore = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>; | |
constexpr auto indexAfter = boost::mp11::mp_at_c<InversePermutedIndices, indexBefore>::value; | |
return indexAfter; | |
} | |
(); | |
}; | |
namespace internal | |
{ | |
template<typename A, typename B> | |
using LessAlignment = std::bool_constant<alignof(A) < alignof(B)>; | |
template<typename A, typename B> | |
using MoreAlignment = std::bool_constant<(alignof(A) > alignof(B))>; | |
} // namespace internal | |
/// Flattens and sorts the record dimension by increasing alignment of its fields. | |
template<typename RecordDim> | |
using FlattenRecordDimIncreasingAlignment = FlattenRecordDimSorted<RecordDim, internal::LessAlignment>; | |
/// Flattens and sorts the record dimension by decreasing alignment of its fields. | |
template<typename RecordDim> | |
using FlattenRecordDimDecreasingAlignment = FlattenRecordDimSorted<RecordDim, internal::MoreAlignment>; | |
/// Flattens and sorts the record dimension by the alignment of its fields to minimize padding. | |
template<typename RecordDim> | |
using FlattenRecordDimMinimizePadding = FlattenRecordDimIncreasingAlignment<RecordDim>; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/Common.hpp == | |
// ============================================================================ | |
namespace llama::mapping | |
{ | |
/// Maps all array dimension indices to the same location and layouts struct members consecutively. This mapping is | |
/// used for temporary, single element views. | |
/// \tparam AlignAndPad If true, padding bytes are inserted to guarantee that struct members are properly aligned. | |
/// If false, struct members are tightly packed. | |
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref | |
/// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and | |
/// \ref FlattenRecordDimMinimizePadding. | |
template< | |
typename TArrayExtents, | |
typename TRecordDim, | |
bool AlignAndPad = true, | |
template<typename> typename FlattenRecordDim = FlattenRecordDimMinimizePadding> | |
struct One : TArrayExtents | |
{ | |
using ArrayExtents = TArrayExtents; | |
using ArrayIndex = typename ArrayExtents::Index; | |
using RecordDim = TRecordDim; | |
static constexpr std::size_t blobCount = 1; | |
constexpr One() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr explicit One(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents | |
{ | |
return ArrayExtents{*this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t) const -> std::size_t | |
{ | |
return flatSizeOf<typename Flattener::FlatRecordDim, AlignAndPad, false>; // no tail padding | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex, RecordCoord<RecordCoords...> = {}) const | |
-> NrAndOffset | |
{ | |
constexpr std::size_t flatFieldIndex = | |
#ifdef __NVCC__ | |
*& // mess with nvcc compiler state to workaround bug | |
#endif | |
Flattener::template flatIndex<RecordCoords...>; | |
constexpr auto offset = flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, AlignAndPad>; | |
return {0, offset}; | |
} | |
private: | |
using Flattener = FlattenRecordDim<TRecordDim>; | |
}; | |
/// One mapping preserving the alignment of the field types by inserting padding. | |
/// \see One | |
template<typename ArrayExtents, typename RecordDim> | |
using AlignedOne = One<ArrayExtents, RecordDim, true, FlattenRecordDimInOrder>; | |
/// One mapping preserving the alignment of the field types by inserting padding and permuting the field order to | |
/// minimize this padding. | |
/// \see One | |
template<typename ArrayExtents, typename RecordDim> | |
using MinAlignedOne = One<ArrayExtents, RecordDim, true, FlattenRecordDimMinimizePadding>; | |
/// One mapping packing the field types tightly, violating the types' alignment requirements. | |
/// \see One | |
template<typename ArrayExtents, typename RecordDim> | |
using PackedOne = One<ArrayExtents, RecordDim, false, FlattenRecordDimInOrder>; | |
template<typename Mapping> | |
inline constexpr bool isOne = false; | |
template<typename ArrayExtents, typename RecordDim, bool AlignAndPad, template<typename> typename FlattenRecordDim> | |
inline constexpr bool isOne<One<ArrayExtents, RecordDim, AlignAndPad, FlattenRecordDim>> = true; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/One.hpp == | |
// ============================================================================ | |
// #include <type_traits> // amalgamate: file already included | |
namespace llama | |
{ | |
#ifdef __cpp_lib_concepts | |
template<typename TMapping, Blob BlobType> | |
#else | |
template<typename TMapping, typename BlobType> | |
#endif | |
struct View; | |
namespace internal | |
{ | |
template<typename Allocator, typename RecordDim> | |
using AllocatorBlobType | |
= decltype(std::declval<Allocator>()(std::integral_constant<std::size_t, alignOf<RecordDim>>{}, 0)); | |
LLAMA_SUPPRESS_HOST_DEVICE_WARNING | |
template<typename Allocator, typename Mapping, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE auto makeBlobArray( | |
const Allocator& alloc, | |
const Mapping& mapping, | |
std::integer_sequence<std::size_t, Is...>) | |
-> Array<AllocatorBlobType<Allocator, typename Mapping::RecordDim>, Mapping::blobCount> | |
{ | |
[[maybe_unused]] constexpr auto alignment | |
= alignOf<typename Mapping::RecordDim>; // g++-12 warns that alignment is unsed | |
return {alloc(std::integral_constant<std::size_t, alignment>{}, mapping.blobSize(Is))...}; | |
} | |
} // namespace internal | |
/// Same as \ref allocView but does not run field constructors. | |
#ifdef __cpp_lib_concepts | |
template<typename Mapping, BlobAllocator Allocator = bloballoc::Vector> | |
#else | |
template<typename Mapping, typename Allocator = bloballoc::Vector> | |
#endif | |
LLAMA_FN_HOST_ACC_INLINE auto allocViewUninitialized(Mapping mapping = {}, const Allocator& alloc = {}) | |
-> View<Mapping, internal::AllocatorBlobType<Allocator, typename Mapping::RecordDim>> | |
{ | |
auto blobs = internal::makeBlobArray(alloc, mapping, std::make_index_sequence<Mapping::blobCount>{}); | |
return {std::move(mapping), std::move(blobs)}; | |
} | |
namespace internal | |
{ | |
template<typename Mapping, typename RecordCoord, typename = void> | |
struct IsComputed : std::false_type | |
{ | |
}; | |
template<typename Mapping, typename RecordCoord> | |
struct IsComputed<Mapping, RecordCoord, std::void_t<decltype(Mapping::isComputed(RecordCoord{}))>> | |
: std::bool_constant<Mapping::isComputed(RecordCoord{})> | |
{ | |
}; | |
} // namespace internal | |
/// Returns true if the field accessed via the given mapping and record coordinate is a computed value. | |
template<typename Mapping, typename RecordCoord> | |
inline constexpr bool isComputed = internal::IsComputed<Mapping, RecordCoord>::value; | |
/// Runs the constructor of all fields reachable through the given view. Computed fields are not constructed. | |
template<typename Mapping, typename BlobType> | |
LLAMA_FN_HOST_ACC_INLINE void constructFields(View<Mapping, BlobType>& view) | |
{ | |
using View = View<Mapping, BlobType>; | |
using RecordDim = typename View::RecordDim; | |
forEachADCoord( | |
view.mapping().extents(), | |
[&](typename View::ArrayIndex ai) | |
{ | |
if constexpr(isRecord<RecordDim> || internal::IsBoundedArray<RecordDim>::value) | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) | |
{ | |
// TODO(bgruber): we could initialize computed fields if we can write to those. We could | |
// test if the returned value can be cast to a T& and then attempt to write. | |
if constexpr(!isComputed<Mapping, decltype(rc)>) | |
new(&view(ai)(rc)) GetType<RecordDim, decltype(rc)>; | |
}); | |
else if constexpr(!isComputed<Mapping, RecordCoord<>>) | |
new(&view(ai)) RecordDim; | |
}); | |
} | |
/// Creates a view based on the given mapping, e.g. \ref AoS or \ref :SoA. For allocating the view's underlying | |
/// memory, the specified allocator callable is used (or the default one, which is \ref bloballoc::Vector). The | |
/// allocator callable is called with the alignment and size of bytes to allocate for each blob of the mapping. | |
/// The constructors are run for all fields by calling \ref constructFields. This function is the preferred way to | |
/// create a \ref View. See also \ref allocViewUninitialized. | |
#ifdef __cpp_lib_concepts | |
template<typename Mapping, BlobAllocator Allocator = bloballoc::Vector> | |
#else | |
template<typename Mapping, typename Allocator = bloballoc::Vector> | |
#endif | |
LLAMA_FN_HOST_ACC_INLINE auto allocView(Mapping mapping = {}, const Allocator& alloc = {}) | |
-> View<Mapping, internal::AllocatorBlobType<Allocator, typename Mapping::RecordDim>> | |
{ | |
auto view = allocViewUninitialized(std::move(mapping), alloc); | |
constructFields(view); | |
return view; | |
} | |
/// Allocates a \ref View holding a single record backed by stack memory (\ref bloballoc::Stack). | |
/// \tparam Dim Dimension of the \ref ArrayExtents of the \ref View. | |
template<std::size_t Dim, typename RecordDim> | |
LLAMA_FN_HOST_ACC_INLINE auto allocViewStack() -> decltype(auto) | |
{ | |
constexpr auto mapping = mapping::MinAlignedOne<ArrayExtentsStatic<Dim, 1>, RecordDim>{}; | |
return allocView(mapping, bloballoc::Stack<mapping.blobSize(0)>{}); | |
} | |
template<typename View, typename BoundRecordCoord = RecordCoord<>, bool OwnView = false> | |
struct VirtualRecord; | |
/// A \ref VirtualRecord that owns and holds a single value. | |
template<typename RecordDim> | |
using One = VirtualRecord<decltype(allocViewStack<0, RecordDim>()), RecordCoord<>, true>; | |
// TODO(bgruber): Higher dimensional iterators might not have good codegen. Multiple nested loops seem to be | |
// superior to a single iterator over multiple dimensions. At least compilers are able to produce better code. | |
// std::mdspan also discovered similar difficulties and there was a discussion in WG21 in Oulu 2016 to | |
// remove/postpone iterators from the design. In std::mdspan's design, the iterator iterated over the co-domain. | |
template<typename View> | |
struct Iterator | |
{ | |
using ArrayIndexIterator = llama::ArrayIndexIterator<typename View::ArrayExtents>; | |
using iterator_category = std::random_access_iterator_tag; | |
using value_type = One<typename View::RecordDim>; | |
using difference_type = typename ArrayIndexIterator::difference_type; | |
using pointer = internal::IndirectValue<VirtualRecord<View>>; | |
using reference = VirtualRecord<View>; | |
constexpr Iterator() = default; | |
LLAMA_FN_HOST_ACC_INLINE constexpr Iterator(ArrayIndexIterator arrayIndex, View* view) | |
: arrayIndex(arrayIndex) | |
, view(view) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator++() -> Iterator& | |
{ | |
++arrayIndex; | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator++(int) -> Iterator | |
{ | |
auto tmp = *this; | |
++*this; | |
return tmp; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator--() -> Iterator& | |
{ | |
--arrayIndex; | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator--(int) -> Iterator | |
{ | |
auto tmp{*this}; | |
--*this; | |
return tmp; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator*() const -> reference | |
{ | |
return (*view)(*arrayIndex); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator->() const -> pointer | |
{ | |
return {**this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator[](difference_type i) const -> reference | |
{ | |
return *(*this + i); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator+=(difference_type n) -> Iterator& | |
{ | |
arrayIndex += n; | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator+(Iterator it, difference_type n) -> Iterator | |
{ | |
it += n; | |
return it; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator+(difference_type n, Iterator it) -> Iterator | |
{ | |
return it + n; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto operator-=(difference_type n) -> Iterator& | |
{ | |
arrayIndex -= n; | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator-(Iterator it, difference_type n) -> Iterator | |
{ | |
it -= n; | |
return it; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator-(const Iterator& a, const Iterator& b) -> difference_type | |
{ | |
assert(a.view == b.view); | |
return static_cast<std::ptrdiff_t>(a.arrayIndex - b.arrayIndex); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator==(const Iterator& a, const Iterator& b) -> bool | |
{ | |
assert(a.view == b.view); | |
return a.arrayIndex == b.arrayIndex; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator!=(const Iterator& a, const Iterator& b) -> bool | |
{ | |
return !(a == b); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator<(const Iterator& a, const Iterator& b) -> bool | |
{ | |
assert(a.view == b.view); | |
return a.arrayIndex < b.arrayIndex; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator>(const Iterator& a, const Iterator& b) -> bool | |
{ | |
return b < a; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator<=(const Iterator& a, const Iterator& b) -> bool | |
{ | |
return !(a > b); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
friend constexpr auto operator>=(const Iterator& a, const Iterator& b) -> bool | |
{ | |
return !(a < b); | |
} | |
ArrayIndexIterator arrayIndex; | |
View* view; | |
}; | |
/// Central LLAMA class holding memory for storage and giving access to values stored there defined by a mapping. A | |
/// view should be created using \ref allocView. | |
/// \tparam TMapping The mapping used by the view to map accesses into memory. | |
/// \tparam BlobType The storage type used by the view holding memory. | |
#ifdef __cpp_lib_concepts | |
template<typename TMapping, Blob BlobType> | |
#else | |
template<typename TMapping, typename BlobType> | |
#endif | |
struct View | |
: private TMapping | |
#if CAN_USE_RANGES | |
, std::ranges::view_base | |
#endif | |
{ | |
static_assert(!std::is_const_v<TMapping>); | |
using Mapping = TMapping; | |
using ArrayExtents = typename Mapping::ArrayExtents; | |
using ArrayIndex = typename Mapping::ArrayIndex; | |
using RecordDim = typename Mapping::RecordDim; | |
using iterator = Iterator<View>; | |
using const_iterator = Iterator<const View>; | |
static_assert( | |
std::is_same_v<Mapping, std::decay_t<Mapping>>, | |
"Mapping must not be const qualified or a reference. Are you using decltype(...) as View template " | |
"argument?"); | |
static_assert( | |
std::is_same_v<ArrayExtents, std::decay_t<ArrayExtents>>, | |
"Mapping::ArrayExtents must not be const qualified or a reference. Are you using decltype(...) as mapping " | |
"template argument?"); | |
View() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
View(Mapping mapping, Array<BlobType, Mapping::blobCount> storageBlobs) | |
: Mapping(std::move(mapping)) | |
, storageBlobs(std::move(storageBlobs)) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto mapping() -> Mapping& | |
{ | |
return static_cast<Mapping&>(*this); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto mapping() const -> const Mapping& | |
{ | |
return static_cast<const Mapping&>(*this); | |
} | |
/// Retrieves the \ref VirtualRecord at the given \ref ArrayIndex index. | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) const -> decltype(auto) | |
{ | |
if constexpr(isRecord<RecordDim> || internal::IsBoundedArray<RecordDim>::value) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return VirtualRecord<const View>{ai, *this}; | |
} | |
else | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return accessor(ai, RecordCoord<>{}); | |
} | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) -> decltype(auto) | |
{ | |
if constexpr(isRecord<RecordDim> || internal::IsBoundedArray<RecordDim>::value) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return VirtualRecord<View>{ai, *this}; | |
} | |
else | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return accessor(ai, RecordCoord<>{}); | |
} | |
} | |
/// Retrieves the \ref VirtualRecord at the \ref ArrayIndex index constructed from the passed component | |
/// indices. | |
template<typename... Indices> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) const -> decltype(auto) | |
{ | |
static_assert( | |
sizeof...(Indices) == ArrayIndex::rank, | |
"Please specify as many indices as you have array dimensions"); | |
static_assert( | |
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>, | |
"Indices must be convertible to std::size_t"); | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return (*this)(ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...}); | |
} | |
template<typename... Indices> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) -> decltype(auto) | |
{ | |
static_assert( | |
sizeof...(Indices) == ArrayIndex::rank, | |
"Please specify as many indices as you have array dimensions"); | |
static_assert( | |
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>, | |
"Indices must be convertible to std::size_t"); | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return (*this)(ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...}); | |
} | |
/// Retrieves the \ref VirtualRecord at the \ref ArrayIndex index constructed from the passed component | |
/// indices. | |
LLAMA_FN_HOST_ACC_INLINE auto operator[](ArrayIndex ai) const -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return (*this)(ai); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto operator[](ArrayIndex ai) -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return (*this)(ai); | |
} | |
/// Retrieves the \ref VirtualRecord at the 1D \ref ArrayIndex index constructed from the passed index. | |
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t index) const -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return (*this)(index); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t index) -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return (*this)(index); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
auto begin() -> iterator | |
{ | |
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
auto begin() const -> const_iterator | |
{ | |
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
auto end() -> iterator | |
{ | |
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
auto end() const -> const_iterator | |
{ | |
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this}; | |
} | |
Array<BlobType, Mapping::blobCount> storageBlobs; | |
private: | |
template<typename TView, typename TBoundRecordCoord, bool OwnView> | |
friend struct VirtualRecord; | |
LLAMA_SUPPRESS_HOST_DEVICE_WARNING | |
template<std::size_t... Coords> | |
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai, RecordCoord<Coords...> rc = {}) const -> decltype(auto) | |
{ | |
if constexpr(llama::isComputed<Mapping, RecordCoord<Coords...>>) | |
return mapping().compute(ai, rc, storageBlobs); | |
else | |
{ | |
const auto [nr, offset] = mapping().blobNrAndOffset(ai, rc); | |
using Type = GetType<RecordDim, RecordCoord<Coords...>>; | |
return reinterpret_cast<const Type&>(storageBlobs[nr][offset]); | |
} | |
} | |
LLAMA_SUPPRESS_HOST_DEVICE_WARNING | |
template<std::size_t... Coords> | |
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai, RecordCoord<Coords...> rc = {}) -> decltype(auto) | |
{ | |
if constexpr(llama::isComputed<Mapping, RecordCoord<Coords...>>) | |
return mapping().compute(ai, rc, storageBlobs); | |
else | |
{ | |
const auto [nr, offset] = mapping().blobNrAndOffset(ai, rc); | |
using Type = GetType<RecordDim, RecordCoord<Coords...>>; | |
using QualifiedType = std::conditional_t< | |
std::is_const_v<std::remove_reference_t<decltype(storageBlobs[nr][offset])>>, | |
const Type, | |
Type>; | |
return reinterpret_cast<QualifiedType&>(storageBlobs[nr][offset]); | |
} | |
} | |
}; | |
template<typename View> | |
inline constexpr auto IsView = false; | |
template<typename Mapping, typename BlobType> | |
inline constexpr auto IsView<View<Mapping, BlobType>> = true; | |
/// Acts like a \ref View, but shows only a smaller and/or shifted part of another view it references, the parent | |
/// view. | |
template<typename TParentView> | |
struct VirtualView | |
{ | |
using ParentView = TParentView; ///< type of the parent view | |
using Mapping = typename ParentView::Mapping; ///< mapping of the parent view | |
using ArrayExtents = typename Mapping::ArrayExtents; ///< array extents of the parent view | |
using ArrayIndex = typename Mapping::ArrayIndex; ///< array index of the parent view | |
/// Creates a VirtualView given a parent \ref View and offset. | |
LLAMA_FN_HOST_ACC_INLINE | |
VirtualView(ParentView& parentView, ArrayIndex offset) : parentView(parentView), offset(offset) | |
{ | |
} | |
template<std::size_t... Coords> | |
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai) const -> const auto& | |
{ | |
return parentView.template accessor<Coords...>(ArrayIndex{ai + offset}); | |
} | |
template<std::size_t... Coords> | |
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai) -> auto& | |
{ | |
return parentView.template accessor<Coords...>(ArrayIndex{ai + offset}); | |
} | |
/// Same as \ref View::operator()(ArrayIndex), but shifted by the offset of this \ref VirtualView. | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) const -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return parentView(ArrayIndex{ai + offset}); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return parentView(ArrayIndex{ai + offset}); | |
} | |
/// Same as corresponding operator in \ref View, but shifted by the offset of this \ref VirtualView. | |
template<typename... Indices> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) const -> decltype(auto) | |
{ | |
static_assert( | |
sizeof...(Indices) == ArrayIndex::rank, | |
"Please specify as many indices as you have array dimensions"); | |
static_assert( | |
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>, | |
"Indices must be convertible to std::size_t"); | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return parentView( | |
ArrayIndex{ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...} + offset}); | |
} | |
template<typename... Indices> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) -> decltype(auto) | |
{ | |
static_assert( | |
sizeof...(Indices) == ArrayIndex::rank, | |
"Please specify as many indices as you have array dimensions"); | |
static_assert( | |
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>, | |
"Indices must be convertible to std::size_t"); | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return parentView( | |
ArrayIndex{ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...} + offset}); | |
} | |
template<std::size_t... Coord> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) const -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return accessor<Coord...>(ArrayIndex{}); | |
} | |
template<std::size_t... Coord> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) -> decltype(auto) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return accessor<Coord...>(ArrayIndex{}); | |
} | |
ParentView& parentView; ///< reference to parent view. | |
const ArrayIndex | |
offset; ///< offset this view's \ref ArrayIndex indices are shifted when passed to the parent view. | |
}; | |
} // namespace llama | |
// == | |
// == ./View.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./VirtualRecord.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "View.hpp" // amalgamate: file already expanded | |
#include <iosfwd> | |
// #include <type_traits> // amalgamate: file already included | |
namespace llama | |
{ | |
template<typename View, typename BoundRecordCoord, bool OwnView> | |
struct VirtualRecord; | |
template<typename View> | |
inline constexpr auto is_VirtualRecord = false; | |
template<typename View, typename BoundRecordCoord, bool OwnView> | |
inline constexpr auto is_VirtualRecord<VirtualRecord<View, BoundRecordCoord, OwnView>> = true; | |
/// Creates a single \ref VirtualRecord owning a view with stack memory and copies all values from an existing \ref | |
/// VirtualRecord. | |
template<typename VirtualRecord> | |
LLAMA_FN_HOST_ACC_INLINE auto copyVirtualRecordStack(const VirtualRecord& vd) -> decltype(auto) | |
{ | |
One<typename VirtualRecord::AccessibleRecordDim> temp; | |
temp = vd; | |
return temp; | |
} | |
namespace internal | |
{ | |
template< | |
typename Functor, | |
typename LeftRecord, | |
typename RightView, | |
typename RightBoundRecordDim, | |
bool RightOwnView> | |
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordArithOperator( | |
LeftRecord& left, | |
const VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>& right) -> LeftRecord& | |
{ | |
using RightRecord = VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>; | |
// if the record dimension left and right is the same, a single loop is enough and no tag check is needed. | |
// this safes a lot of compilation time. | |
if constexpr(std::is_same_v< | |
typename LeftRecord::AccessibleRecordDim, | |
typename RightRecord::AccessibleRecordDim>) | |
{ | |
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>([&](auto rc) LLAMA_LAMBDA_INLINE | |
{ Functor{}(left(rc), right(rc)); }); | |
} | |
else | |
{ | |
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>( | |
[&](auto leftRC) LLAMA_LAMBDA_INLINE | |
{ | |
using LeftInnerCoord = decltype(leftRC); | |
forEachLeafCoord<typename RightRecord::AccessibleRecordDim>( | |
[&](auto rightRC) LLAMA_LAMBDA_INLINE | |
{ | |
using RightInnerCoord = decltype(rightRC); | |
if constexpr(hasSameTags< | |
typename LeftRecord::AccessibleRecordDim, | |
LeftInnerCoord, | |
typename RightRecord::AccessibleRecordDim, | |
RightInnerCoord>) | |
{ | |
Functor{}(left(leftRC), right(rightRC)); | |
} | |
}); | |
}); | |
} | |
return left; | |
} | |
template<typename Functor, typename LeftRecord, typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordArithOperator(LeftRecord& left, const T& right) -> LeftRecord& | |
{ | |
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>([&](auto leftRC) LLAMA_LAMBDA_INLINE | |
{ Functor{}(left(leftRC), right); }); | |
return left; | |
} | |
template< | |
typename Functor, | |
typename LeftRecord, | |
typename RightView, | |
typename RightBoundRecordDim, | |
bool RightOwnView> | |
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordRelOperator( | |
const LeftRecord& left, | |
const VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>& right) -> bool | |
{ | |
using RightRecord = VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>; | |
bool result = true; | |
// if the record dimension left and right is the same, a single loop is enough and no tag check is needed. | |
// this safes a lot of compilation time. | |
if constexpr(std::is_same_v< | |
typename LeftRecord::AccessibleRecordDim, | |
typename RightRecord::AccessibleRecordDim>) | |
{ | |
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>( | |
[&](auto rc) LLAMA_LAMBDA_INLINE { result &= Functor{}(left(rc), right(rc)); }); | |
} | |
else | |
{ | |
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>( | |
[&](auto leftRC) LLAMA_LAMBDA_INLINE | |
{ | |
using LeftInnerCoord = decltype(leftRC); | |
forEachLeafCoord<typename RightRecord::AccessibleRecordDim>( | |
[&](auto rightRC) LLAMA_LAMBDA_INLINE | |
{ | |
using RightInnerCoord = decltype(rightRC); | |
if constexpr(hasSameTags< | |
typename LeftRecord::AccessibleRecordDim, | |
LeftInnerCoord, | |
typename RightRecord::AccessibleRecordDim, | |
RightInnerCoord>) | |
{ | |
result &= Functor{}(left(leftRC), right(rightRC)); | |
} | |
}); | |
}); | |
} | |
return result; | |
} | |
template<typename Functor, typename LeftRecord, typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordRelOperator(const LeftRecord& left, const T& right) -> bool | |
{ | |
bool result = true; | |
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>( | |
[&](auto leftRC) LLAMA_LAMBDA_INLINE { | |
result &= Functor{}( | |
left(leftRC), | |
static_cast<std::remove_reference_t<decltype(left(leftRC))>>(right)); | |
}); | |
return result; | |
} | |
struct Assign | |
{ | |
template<typename A, typename B> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) | |
{ | |
return std::forward<A>(a) = b; | |
} | |
}; | |
struct PlusAssign | |
{ | |
template<typename A, typename B> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) | |
{ | |
return std::forward<A>(a) += b; | |
} | |
}; | |
struct MinusAssign | |
{ | |
template<typename A, typename B> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) | |
{ | |
return std::forward<A>(a) -= b; | |
} | |
}; | |
struct MultiplyAssign | |
{ | |
template<typename A, typename B> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) | |
{ | |
return std::forward<A>(a) *= b; | |
} | |
}; | |
struct DivideAssign | |
{ | |
template<typename A, typename B> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) | |
{ | |
return std::forward<A>(a) /= b; | |
} | |
}; | |
struct ModuloAssign | |
{ | |
template<typename A, typename B> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) | |
{ | |
return std::forward<A>(a) %= b; | |
} | |
}; | |
template<typename TWithOptionalConst, typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto asTupleImpl(TWithOptionalConst& leaf, T) -> std::enable_if_t< | |
!is_VirtualRecord<std::decay_t<TWithOptionalConst>>, | |
std::reference_wrapper<TWithOptionalConst>> | |
{ | |
return leaf; | |
} | |
template<typename VirtualRecord, typename T, std::size_t N, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE auto asTupleImplArr(VirtualRecord&& vd, T(&&)[N], std::index_sequence<Is...>) | |
{ | |
return std::make_tuple(asTupleImpl(vd(RecordCoord<Is>{}), T{})...); | |
} | |
template<typename VirtualRecord, typename T, std::size_t N> | |
LLAMA_FN_HOST_ACC_INLINE auto asTupleImpl(VirtualRecord&& vd, T(&&a)[N]) | |
{ | |
return asTupleImplArr(std::forward<VirtualRecord>(vd), std::move(a), std::make_index_sequence<N>{}); | |
} | |
template<typename VirtualRecord, typename... Fields> | |
LLAMA_FN_HOST_ACC_INLINE auto asTupleImpl(VirtualRecord&& vd, Record<Fields...>) | |
{ | |
return std::make_tuple(asTupleImpl(vd(GetFieldTag<Fields>{}), GetFieldType<Fields>{})...); | |
} | |
template<typename TWithOptionalConst, typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImpl(TWithOptionalConst& leaf, T) | |
-> std::enable_if_t<!is_VirtualRecord<std::decay_t<TWithOptionalConst>>, std::tuple<TWithOptionalConst&>> | |
{ | |
return {leaf}; | |
} | |
template<typename VirtualRecord, typename T, std::size_t N, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImplArr(VirtualRecord&& vd, T(&&)[N], std::index_sequence<Is...>) | |
{ | |
return std::tuple_cat(asFlatTupleImpl(vd(RecordCoord<Is>{}), T{})...); | |
} | |
template<typename VirtualRecord, typename T, std::size_t N> | |
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImpl(VirtualRecord&& vd, T(&&a)[N]) | |
{ | |
return asFlatTupleImplArr(std::forward<VirtualRecord>(vd), std::move(a), std::make_index_sequence<N>{}); | |
} | |
template<typename VirtualRecord, typename... Fields> | |
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImpl(VirtualRecord&& vd, Record<Fields...>) | |
{ | |
return std::tuple_cat(asFlatTupleImpl(vd(GetFieldTag<Fields>{}), GetFieldType<Fields>{})...); | |
} | |
template<typename T, typename = void> | |
constexpr inline auto isTupleLike = false; | |
// get<I>(t) and std::tuple_size<T> must be available | |
using std::get; // make sure a get<0>() can be found, so the compiler can compile the trait | |
template<typename T> | |
constexpr inline auto | |
isTupleLike<T, std::void_t<decltype(get<0>(std::declval<T>())), std::tuple_size<T>>> = true; | |
template<typename... Ts> | |
constexpr inline auto dependentFalse = false; | |
template<typename Tuple1, typename Tuple2, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE void assignTuples(Tuple1&& dst, Tuple2&& src, std::index_sequence<Is...>); | |
template<typename T1, typename T2> | |
LLAMA_FN_HOST_ACC_INLINE void assignTupleElement(T1&& dst, T2&& src) | |
{ | |
if constexpr(isTupleLike<std::decay_t<T1>> && isTupleLike<std::decay_t<T2>>) | |
{ | |
static_assert(std::tuple_size_v<std::decay_t<T1>> == std::tuple_size_v<std::decay_t<T2>>); | |
assignTuples(dst, src, std::make_index_sequence<std::tuple_size_v<std::decay_t<T1>>>{}); | |
} | |
else if constexpr(!isTupleLike<std::decay_t<T1>> && !isTupleLike<std::decay_t<T2>>) | |
std::forward<T1>(dst) = std::forward<T2>(src); | |
else | |
static_assert( | |
dependentFalse<T1, T2>, | |
"Elements to assign are not tuple/tuple or non-tuple/non-tuple."); | |
} | |
template<typename Tuple1, typename Tuple2, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE void assignTuples(Tuple1&& dst, Tuple2&& src, std::index_sequence<Is...>) | |
{ | |
static_assert(std::tuple_size_v<std::decay_t<Tuple1>> == std::tuple_size_v<std::decay_t<Tuple2>>); | |
using std::get; | |
(assignTupleElement(get<Is>(std::forward<Tuple1>(dst)), get<Is>(std::forward<Tuple2>(src))), ...); | |
} | |
template<typename T, typename Tuple, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE auto makeFromTuple(Tuple&& src, std::index_sequence<Is...>) | |
{ | |
using std::get; | |
return T{get<Is>(std::forward<Tuple>(src))...}; | |
} | |
template<typename T, typename SFINAE, typename... Args> | |
constexpr inline auto isDirectListInitializableImpl = false; | |
template<typename T, typename... Args> | |
constexpr inline auto | |
isDirectListInitializableImpl<T, std::void_t<decltype(T{std::declval<Args>()...})>, Args...> = true; | |
template<typename T, typename... Args> | |
constexpr inline auto isDirectListInitializable = isDirectListInitializableImpl<T, void, Args...>; | |
template<typename T, typename Tuple> | |
constexpr inline auto isDirectListInitializableFromTuple = false; | |
template<typename T, template<typename...> typename Tuple, typename... Args> | |
constexpr inline auto | |
isDirectListInitializableFromTuple<T, Tuple<Args...>> = isDirectListInitializable<T, Args...>; | |
} // namespace internal | |
/// Virtual record type returned by \ref View after resolving an array dimensions coordinate or partially resolving | |
/// a \ref RecordCoord. A virtual record does not hold data itself (thus named "virtual"), it just binds enough | |
/// information (array dimensions coord and partial record coord) to retrieve it from a \ref View later. Virtual | |
/// records should not be created by the user. They are returned from various access functions in \ref View and | |
/// VirtualRecord itself. | |
template<typename TView, typename TBoundRecordCoord, bool OwnView> | |
struct VirtualRecord : private TView::Mapping::ArrayIndex | |
{ | |
using View = TView; ///< View this virtual record points into. | |
using BoundRecordCoord | |
= TBoundRecordCoord; ///< Record coords into View::RecordDim which are already bound by this VirtualRecord. | |
private: | |
using ArrayIndex = typename View::Mapping::ArrayIndex; | |
using RecordDim = typename View::Mapping::RecordDim; | |
std::conditional_t<OwnView, View, View&> view; | |
public: | |
/// Subtree of the record dimension of View starting at BoundRecordCoord. If BoundRecordCoord is | |
/// `RecordCoord<>` (default) AccessibleRecordDim is the same as `Mapping::RecordDim`. | |
using AccessibleRecordDim = GetType<RecordDim, BoundRecordCoord>; | |
/// Creates an empty VirtualRecord. Only available for if the view is owned. Used by llama::One. | |
LLAMA_FN_HOST_ACC_INLINE VirtualRecord() | |
/* requires(OwnView) */ | |
: ArrayIndex{} | |
, view{allocViewStack<0, RecordDim>()} | |
{ | |
static_assert(OwnView, "The default constructor of VirtualRecord is only available if it owns the view."); | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
VirtualRecord(ArrayIndex ai, std::conditional_t<OwnView, View&&, View&> view) | |
: ArrayIndex{ai} | |
, view{static_cast<decltype(view)>(view)} | |
{ | |
} | |
VirtualRecord(const VirtualRecord&) = default; | |
// NOLINTNEXTLINE(cert-oop54-cpp) | |
LLAMA_FN_HOST_ACC_INLINE auto operator=(const VirtualRecord& other) -> VirtualRecord& | |
{ | |
// NOLINTNEXTLINE(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) | |
return this->operator=<VirtualRecord>(other); | |
} | |
VirtualRecord(VirtualRecord&&) noexcept = default; | |
auto operator=(VirtualRecord&&) noexcept -> VirtualRecord& = default; | |
~VirtualRecord() = default; | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto arrayIndex() const -> ArrayIndex | |
{ | |
return *this; | |
} | |
/// Create a VirtuaRecord from a different VirtualRecord. Only available for if the view is owned. Used by | |
/// llama::One. | |
template<typename OtherView, typename OtherBoundRecordCoord, bool OtherOwnView> | |
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions) | |
LLAMA_FN_HOST_ACC_INLINE VirtualRecord( | |
const VirtualRecord<OtherView, OtherBoundRecordCoord, OtherOwnView>& virtualRecord) | |
/* requires(OwnView) */ | |
: VirtualRecord() | |
{ | |
static_assert( | |
OwnView, | |
"The copy constructor of VirtualRecord from a different VirtualRecord is only available if it owns " | |
"the " | |
"view."); | |
*this = virtualRecord; | |
} | |
// TODO(bgruber): unify with previous in C++20 and use explicit(cond) | |
/// Create a VirtuaRecord from a scalar. Only available for if the view is owned. Used by llama::One. | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE explicit VirtualRecord(const T& scalar) | |
/* requires(OwnView) */ | |
: VirtualRecord() | |
{ | |
static_assert( | |
OwnView, | |
"The constructor of VirtualRecord from a scalar is only available if it owns the view."); | |
*this = scalar; | |
} | |
/// Access a record in the record dimension underneath the current virtual record using a \ref RecordCoord. If | |
/// the access resolves to a leaf, a reference to a variable inside the \ref View storage is returned, | |
/// otherwise another virtual record. | |
template<std::size_t... Coord> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) const -> decltype(auto) | |
{ | |
using AbsolutCoord = Cat<BoundRecordCoord, RecordCoord<Coord...>>; | |
using AccessedType = GetType<RecordDim, AbsolutCoord>; | |
if constexpr(isRecord<AccessedType> || internal::IsBoundedArray<AccessedType>::value) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return VirtualRecord<const View, AbsolutCoord>{arrayIndex(), this->view}; | |
} | |
else | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return this->view.accessor(arrayIndex(), AbsolutCoord{}); | |
} | |
} | |
// FIXME(bgruber): remove redundancy | |
template<std::size_t... Coord> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) -> decltype(auto) | |
{ | |
using AbsolutCoord = Cat<BoundRecordCoord, RecordCoord<Coord...>>; | |
using AccessedType = GetType<RecordDim, AbsolutCoord>; | |
if constexpr(isRecord<AccessedType> || internal::IsBoundedArray<AccessedType>::value) | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return VirtualRecord<View, AbsolutCoord>{arrayIndex(), this->view}; | |
} | |
else | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return this->view.accessor(arrayIndex(), AbsolutCoord{}); | |
} | |
} | |
/// Access a record in the record dimension underneath the current virtual record using a series of tags. If | |
/// the access resolves to a leaf, a reference to a variable inside the \ref View storage is returned, | |
/// otherwise another virtual record. | |
template<typename... Tags> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(Tags...) const -> decltype(auto) | |
{ | |
using RecordCoord = GetCoordFromTags<AccessibleRecordDim, Tags...>; | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return operator()(RecordCoord{}); | |
} | |
// FIXME(bgruber): remove redundancy | |
template<typename... Tags> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(Tags...) -> decltype(auto) | |
{ | |
using RecordCoord = GetCoordFromTags<AccessibleRecordDim, Tags...>; | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return operator()(RecordCoord{}); | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto operator=(const T& other) -> VirtualRecord& | |
{ | |
// NOLINTNEXTLINE(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) | |
return internal::virtualRecordArithOperator<internal::Assign>(*this, other); | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto operator+=(const T& other) -> VirtualRecord& | |
{ | |
return internal::virtualRecordArithOperator<internal::PlusAssign>(*this, other); | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto operator-=(const T& other) -> VirtualRecord& | |
{ | |
return internal::virtualRecordArithOperator<internal::MinusAssign>(*this, other); | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto operator*=(const T& other) -> VirtualRecord& | |
{ | |
return internal::virtualRecordArithOperator<internal::MultiplyAssign>(*this, other); | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto operator/=(const T& other) -> VirtualRecord& | |
{ | |
return internal::virtualRecordArithOperator<internal::DivideAssign>(*this, other); | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto operator%=(const T& other) -> VirtualRecord& | |
{ | |
return internal::virtualRecordArithOperator<internal::ModuloAssign>(*this, other); | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator+(const VirtualRecord& vd, const T& t) | |
{ | |
return copyVirtualRecordStack(vd) += t; | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator+(const T& t, const VirtualRecord& vd) | |
{ | |
return vd + t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator-(const VirtualRecord& vd, const T& t) | |
{ | |
return copyVirtualRecordStack(vd) -= t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator*(const VirtualRecord& vd, const T& t) | |
{ | |
return copyVirtualRecordStack(vd) *= t; | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator*(const T& t, const VirtualRecord& vd) | |
{ | |
return vd * t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator/(const VirtualRecord& vd, const T& t) | |
{ | |
return copyVirtualRecordStack(vd) /= t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator%(const VirtualRecord& vd, const T& t) | |
{ | |
return copyVirtualRecordStack(vd) %= t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator==(const VirtualRecord& vd, const T& t) -> bool | |
{ | |
return internal::virtualRecordRelOperator<std::equal_to<>>(vd, t); | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator==(const T& t, const VirtualRecord& vd) -> bool | |
{ | |
return vd == t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator!=(const VirtualRecord& vd, const T& t) -> bool | |
{ | |
return internal::virtualRecordRelOperator<std::not_equal_to<>>(vd, t); | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator!=(const T& t, const VirtualRecord& vd) -> bool | |
{ | |
return vd != t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator<(const VirtualRecord& vd, const T& t) -> bool | |
{ | |
return internal::virtualRecordRelOperator<std::less<>>(vd, t); | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator<(const T& t, const VirtualRecord& vd) -> bool | |
{ | |
return vd > t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator<=(const VirtualRecord& vd, const T& t) -> bool | |
{ | |
return internal::virtualRecordRelOperator<std::less_equal<>>(vd, t); | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator<=(const T& t, const VirtualRecord& vd) -> bool | |
{ | |
return vd >= t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator>(const VirtualRecord& vd, const T& t) -> bool | |
{ | |
return internal::virtualRecordRelOperator<std::greater<>>(vd, t); | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator>(const T& t, const VirtualRecord& vd) -> bool | |
{ | |
return vd < t; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator>=(const VirtualRecord& vd, const T& t) -> bool | |
{ | |
return internal::virtualRecordRelOperator<std::greater_equal<>>(vd, t); | |
} | |
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>> | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator>=(const T& t, const VirtualRecord& vd) -> bool | |
{ | |
return vd <= t; | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto asTuple() | |
{ | |
return internal::asTupleImpl(*this, AccessibleRecordDim{}); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto asTuple() const | |
{ | |
return internal::asTupleImpl(*this, AccessibleRecordDim{}); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto asFlatTuple() | |
{ | |
return internal::asFlatTupleImpl(*this, AccessibleRecordDim{}); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto asFlatTuple() const | |
{ | |
return internal::asFlatTupleImpl(*this, AccessibleRecordDim{}); | |
} | |
template<std::size_t I> | |
LLAMA_FN_HOST_ACC_INLINE auto get() -> decltype(auto) | |
{ | |
return operator()(RecordCoord<I>{}); | |
} | |
template<std::size_t I> | |
LLAMA_FN_HOST_ACC_INLINE auto get() const -> decltype(auto) | |
{ | |
return operator()(RecordCoord<I>{}); | |
} | |
template<typename TupleLike> | |
LLAMA_FN_HOST_ACC_INLINE auto loadAs() -> TupleLike | |
{ | |
static_assert( | |
internal::isDirectListInitializableFromTuple<TupleLike, decltype(asFlatTuple())>, | |
"TupleLike must be constructible from as many values as this VirtualRecord recursively represents " | |
"like " | |
"this: TupleLike{values...}"); | |
return internal::makeFromTuple<TupleLike>( | |
asFlatTuple(), | |
std::make_index_sequence<std::tuple_size_v<decltype(asFlatTuple())>>{}); | |
} | |
template<typename TupleLike> | |
LLAMA_FN_HOST_ACC_INLINE auto loadAs() const -> TupleLike | |
{ | |
static_assert( | |
internal::isDirectListInitializableFromTuple<TupleLike, decltype(asFlatTuple())>, | |
"TupleLike must be constructible from as many values as this VirtualRecord recursively represents " | |
"like " | |
"this: TupleLike{values...}"); | |
return internal::makeFromTuple<TupleLike>( | |
asFlatTuple(), | |
std::make_index_sequence<std::tuple_size_v<decltype(asFlatTuple())>>{}); | |
} | |
struct Loader | |
{ | |
VirtualRecord& vd; | |
template<typename T> | |
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions) | |
LLAMA_FN_HOST_ACC_INLINE operator T() | |
{ | |
return vd.loadAs<T>(); | |
} | |
}; | |
struct LoaderConst | |
{ | |
const VirtualRecord& vd; | |
template<typename T> | |
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions) | |
LLAMA_FN_HOST_ACC_INLINE operator T() const | |
{ | |
return vd.loadAs<T>(); | |
} | |
}; | |
LLAMA_FN_HOST_ACC_INLINE auto load() -> Loader | |
{ | |
return {*this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto load() const -> LoaderConst | |
{ | |
return {*this}; | |
} | |
template<typename TupleLike> | |
LLAMA_FN_HOST_ACC_INLINE void store(const TupleLike& t) | |
{ | |
internal::assignTuples(asTuple(), t, std::make_index_sequence<std::tuple_size_v<TupleLike>>{}); | |
} | |
// swap for equal VirtualRecord | |
LLAMA_FN_HOST_ACC_INLINE friend void swap( | |
std::conditional_t<OwnView, VirtualRecord&, VirtualRecord> a, | |
std::conditional_t<OwnView, VirtualRecord&, VirtualRecord> b) noexcept | |
{ | |
forEachLeafCoord<AccessibleRecordDim>( | |
[&](auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
using std::swap; | |
swap(a(rc), b(rc)); | |
}); | |
} | |
}; | |
// swap for heterogeneous VirtualRecord | |
template< | |
typename ViewA, | |
typename BoundRecordDimA, | |
bool OwnViewA, | |
typename ViewB, | |
typename BoundRecordDimB, | |
bool OwnViewB> | |
LLAMA_FN_HOST_ACC_INLINE auto swap( | |
VirtualRecord<ViewA, BoundRecordDimA, OwnViewA>& a, | |
VirtualRecord<ViewB, BoundRecordDimB, OwnViewB>& b) noexcept | |
-> std::enable_if_t<std::is_same_v< | |
typename VirtualRecord<ViewA, BoundRecordDimA, OwnViewA>::AccessibleRecordDim, | |
typename VirtualRecord<ViewB, BoundRecordDimB, OwnViewB>::AccessibleRecordDim>> | |
{ | |
using LeftRecord = VirtualRecord<ViewA, BoundRecordDimA, OwnViewA>; | |
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>( | |
[&](auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
using std::swap; | |
swap(a(rc), b(rc)); | |
}); | |
} | |
template<typename View, typename BoundRecordCoord, bool OwnView> | |
auto operator<<(std::ostream& os, const VirtualRecord<View, BoundRecordCoord, OwnView>& vr) -> std::ostream& | |
{ | |
using RecordDim = typename VirtualRecord<View, BoundRecordCoord, OwnView>::AccessibleRecordDim; | |
os << "{"; | |
// TODO(bgruber): I tried refactoring both branches into one, but MSVC and icpc have troubles with correctly | |
// discarding the discarded if constexpr branch and not instantiating templates inside them. | |
if constexpr(std::is_array_v<RecordDim>) | |
{ | |
constexpr auto size = std::extent_v<RecordDim>; | |
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<size>>( | |
[&](auto ic) | |
{ | |
constexpr std::size_t i = decltype(ic)::value; | |
os << '[' << i << ']' << ": " << vr(RecordCoord<i>{}); | |
if(i + 1 < size) | |
os << ", "; | |
}); | |
} | |
else | |
{ | |
constexpr auto size = boost::mp11::mp_size<RecordDim>::value; | |
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<size>>( | |
[&](auto ic) | |
{ | |
constexpr std::size_t i = decltype(ic)::value; | |
using Field = boost::mp11::mp_at_c<RecordDim, i>; | |
using Tag = GetFieldTag<Field>; | |
os << structName<Tag>() << ": " << vr(RecordCoord<i>{}); | |
if(i + 1 < size) | |
os << ", "; | |
}); | |
} | |
os << "}"; | |
return os; | |
} | |
template<typename VirtualRecordFwd, typename Functor> | |
LLAMA_FN_HOST_ACC_INLINE constexpr void forEachLeaf(VirtualRecordFwd&& vr, Functor&& functor) | |
{ | |
using VirtualRecord = std::remove_reference_t<VirtualRecordFwd>; | |
LLAMA_FORCE_INLINE_RECURSIVE | |
forEachLeafCoord<typename VirtualRecord::AccessibleRecordDim>( | |
[functor = std::forward<Functor>(functor), &vr = vr](auto rc) | |
LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(constexpr mutable) { std::forward<Functor>(functor)(vr(rc)); }); | |
} | |
} // namespace llama | |
template<typename View, typename BoundRecordCoord, bool OwnView> | |
struct std::tuple_size<llama::VirtualRecord<View, BoundRecordCoord, OwnView>> | |
: boost::mp11::mp_size<typename llama::VirtualRecord<View, BoundRecordCoord, OwnView>::AccessibleRecordDim> | |
{ | |
}; | |
template<std::size_t I, typename View, typename BoundRecordCoord, bool OwnView> | |
struct std::tuple_element<I, llama::VirtualRecord<View, BoundRecordCoord, OwnView>> | |
{ | |
using type = decltype(std::declval<llama::VirtualRecord<View, BoundRecordCoord, OwnView>>().template get<I>()); | |
}; | |
template<std::size_t I, typename View, typename BoundRecordCoord, bool OwnView> | |
struct std::tuple_element<I, const llama::VirtualRecord<View, BoundRecordCoord, OwnView>> | |
{ | |
using type | |
= decltype(std::declval<const llama::VirtualRecord<View, BoundRecordCoord, OwnView>>().template get<I>()); | |
}; | |
#if CAN_USE_RANGES | |
template< | |
typename ViewA, | |
typename BoundA, | |
bool OwnA, | |
typename ViewB, | |
typename BoundB, | |
bool OwnB, | |
template<class> | |
class TQual, | |
template<class> | |
class UQual> | |
struct std::basic_common_reference< | |
llama::VirtualRecord<ViewA, BoundA, OwnA>, | |
llama::VirtualRecord<ViewB, BoundB, OwnB>, | |
TQual, | |
UQual> | |
{ | |
using type = std::enable_if_t< | |
std::is_same_v< | |
typename llama::VirtualRecord<ViewA, BoundA, OwnA>::AccessibleRecordDim, | |
typename llama::VirtualRecord<ViewB, BoundB, OwnB>::AccessibleRecordDim>, | |
llama::One<typename ViewA::RecordDim>>; | |
}; | |
#endif | |
// == | |
// == ./VirtualRecord.hpp == | |
// ============================================================================ | |
// #include <algorithm> // amalgamate: file already included | |
#include <stdexcept> | |
// #include <string> // amalgamate: file already included | |
namespace llama | |
{ | |
// TODO(bgruber): expose blob allocator | |
/// An equivalent of std::vector<T> backed by a \ref View. Elements are never value initialized though. No strong | |
/// exception guarantee. | |
/// WARNING: This class is experimental. | |
/// @tparam Mapping The mapping to be used for the underlying view. Needs to have 1 array dimension. | |
template<typename Mapping> | |
struct Vector | |
{ | |
static_assert(Mapping::ArrayExtents::rank == 1, "llama::Vector only supports 1D mappings"); | |
using ViewType = decltype(allocViewUninitialized<Mapping>()); | |
using RecordDim = typename Mapping::RecordDim; | |
using iterator = decltype(std::declval<ViewType>().begin()); | |
using value_type = typename iterator::value_type; | |
Vector() = default; | |
template<typename VirtualRecord = One<RecordDim>> | |
LLAMA_FN_HOST_ACC_INLINE explicit Vector(std::size_t count, const VirtualRecord& value = {}) | |
{ | |
reserve(count); | |
for(std::size_t i = 0; i < count; i++) | |
push_back(value); | |
} | |
template<typename Iterator> | |
LLAMA_FN_HOST_ACC_INLINE Vector(Iterator first, Iterator last) | |
{ | |
if constexpr(std::is_same_v< | |
typename std::iterator_traits<Iterator>::iterator_category, | |
std::random_access_iterator_tag>) | |
reserve(std::distance(first, last)); | |
for(; first != last; ++first) | |
push_back(*first); | |
} | |
Vector(const Vector& other) = default; | |
LLAMA_FN_HOST_ACC_INLINE Vector(Vector&& other) noexcept | |
{ | |
swap(other); | |
} | |
auto operator=(const Vector& other) -> Vector& = default; | |
LLAMA_FN_HOST_ACC_INLINE auto operator=(Vector&& other) noexcept -> Vector& | |
{ | |
swap(other); | |
return *this; | |
} | |
~Vector() = default; | |
// TODO(bgruber): assign | |
LLAMA_FN_HOST_ACC_INLINE auto at(std::size_t i) -> decltype(auto) | |
{ | |
if(i >= m_size) | |
throw std::out_of_range{ | |
"Index " + std::to_string(i) + "out of range [0:" + std::to_string(m_size) + "["}; | |
return m_view(i); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto at(std::size_t i) const -> decltype(auto) | |
{ | |
if(i >= m_size) | |
throw std::out_of_range{ | |
"Index " + std::to_string(i) + "out of range [0:" + std::to_string(m_size) + "["}; | |
return m_view(i); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t i) -> decltype(auto) | |
{ | |
return m_view(i); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t i) const -> decltype(auto) | |
{ | |
return m_view(i); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto front() -> decltype(auto) | |
{ | |
return m_view(0); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto front() const -> decltype(auto) | |
{ | |
return m_view(0); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto back() -> decltype(auto) | |
{ | |
return m_view(m_size - 1); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto back() const -> decltype(auto) | |
{ | |
return m_view(m_size - 1); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto begin() -> decltype(auto) | |
{ | |
return m_view.begin(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto begin() const -> decltype(auto) | |
{ | |
return m_view.begin(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto cbegin() -> decltype(auto) | |
{ | |
return std::as_const(m_view).begin(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto cbegin() const -> decltype(auto) | |
{ | |
return m_view.begin(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto end() -> decltype(auto) | |
{ | |
return m_view.begin() + m_size; | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto end() const -> decltype(auto) | |
{ | |
return m_view.begin() + m_size; | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto cend() -> decltype(auto) | |
{ | |
return std::as_const(m_view).begin() + m_size; | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto cend() const -> decltype(auto) | |
{ | |
return m_view.begin() + m_size; | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto empty() const -> bool | |
{ | |
return m_size == 0; | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto size() const -> std::size_t | |
{ | |
return m_size; | |
} | |
LLAMA_FN_HOST_ACC_INLINE void reserve(std::size_t cap) | |
{ | |
if(cap > capacity()) | |
changeCapacity(cap); | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto capacity() const -> std::size_t | |
{ | |
return m_view.mapping().extents()[0]; | |
} | |
LLAMA_FN_HOST_ACC_INLINE void shrink_to_fit() | |
{ | |
changeCapacity(m_size); | |
} | |
LLAMA_FN_HOST_ACC_INLINE void clear() | |
{ | |
m_size = 0; | |
} | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE auto insert(iterator pos, T&& t) -> iterator | |
{ | |
const auto i = pos - begin(); | |
reserve(m_size + 1); // might invalidate pos | |
pos = begin() + i; | |
std::copy_backward(pos, end(), end() + 1); | |
m_view[i] = std::forward<T>(t); | |
m_size++; | |
return pos; | |
} | |
// TODO(bgruber): more insert overloads | |
// TODO(bgruber): emplace | |
LLAMA_FN_HOST_ACC_INLINE auto erase(iterator pos) -> iterator | |
{ | |
std::copy(pos + 1, end(), pos); | |
m_size--; | |
return pos; | |
} | |
// TODO(bgruber): more erase overloads | |
// TODO(bgruber): T here is probably a virtual record. We could also allow any struct that is storable to the | |
// view via VirtualRecord::store(). | |
template<typename T> | |
LLAMA_FN_HOST_ACC_INLINE void push_back(T&& t) | |
{ | |
if(const auto cap = capacity(); m_size == cap) | |
reserve(std::max(cap + cap / 2, m_size + 1)); | |
m_view[m_size++] = std::forward<T>(t); | |
} | |
// TODO(bgruber): emplace_back | |
LLAMA_FN_HOST_ACC_INLINE void pop_back() | |
{ | |
m_size--; | |
} | |
template<typename VirtualRecord = One<RecordDim>> | |
LLAMA_FN_HOST_ACC_INLINE void resize(std::size_t count, const VirtualRecord& value = {}) | |
{ | |
reserve(count); | |
for(std::size_t i = m_size; i < count; i++) | |
m_view[i] = value; | |
m_size = count; | |
} | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator==(const Vector& a, const Vector& b) -> bool | |
{ | |
if(a.m_size != b.m_size) | |
return false; | |
return std::equal(a.begin(), a.end(), b.begin()); | |
} | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator!=(const Vector& a, const Vector& b) -> bool | |
{ | |
return !(a == b); | |
} | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator<(const Vector& a, const Vector& b) -> bool | |
{ | |
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); | |
} | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator<=(const Vector& a, const Vector& b) -> bool | |
{ | |
return !(b < a); | |
} | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator>(const Vector& a, const Vector& b) -> bool | |
{ | |
return b < a; | |
} | |
LLAMA_FN_HOST_ACC_INLINE friend auto operator>=(const Vector& a, const Vector& b) -> bool | |
{ | |
return !(a < b); | |
} | |
LLAMA_FN_HOST_ACC_INLINE friend void swap(Vector& a, Vector& b) noexcept | |
{ | |
a.swap(b); | |
} | |
private: | |
LLAMA_FN_HOST_ACC_INLINE void changeCapacity(std::size_t cap) | |
{ | |
auto newView = allocViewUninitialized<Mapping>(Mapping{typename Mapping::ArrayExtents{cap}}); | |
auto b = begin(); | |
std::copy(begin(), b + std::min(m_size, cap), newView.begin()); | |
using std::swap; | |
swap(m_view, newView); // depends on move semantic of View | |
} | |
LLAMA_FN_HOST_ACC_INLINE void swap(Vector& other) noexcept | |
{ | |
using std::swap; | |
swap(m_view, other.m_view); // depends on move semantic of View | |
swap(m_size, other.m_size); | |
} | |
ViewType m_view = {}; | |
std::size_t m_size = 0; | |
}; | |
} // namespace llama | |
// == | |
// == ./Vector.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./Copy.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "View.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/AoSoA.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Common.hpp" // amalgamate: file already expanded | |
// #include <limits> // amalgamate: file already included | |
namespace llama::mapping | |
{ | |
/// The maximum number of vector lanes that can be used to fetch each leaf type in the record dimension into a | |
/// vector register of the given size in bits. | |
template<typename RecordDim, std::size_t VectorRegisterBits> | |
inline constexpr std::size_t maxLanes = []() constexpr | |
{ | |
auto max = std::numeric_limits<std::size_t>::max(); | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) | |
{ | |
using AttributeType = GetType<RecordDim, decltype(rc)>; | |
max = std::min(max, VectorRegisterBits / (sizeof(AttributeType) * CHAR_BIT)); | |
}); | |
return max; | |
} | |
(); | |
/// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView. | |
/// \tparam Lanes The size of the inner arrays of this array of struct of arrays. | |
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref | |
/// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and | |
/// \ref FlattenRecordDimMinimizePadding. | |
template< | |
typename TArrayExtents, | |
typename TRecordDim, | |
std::size_t Lanes, | |
typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, | |
template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder> | |
struct AoSoA : private TArrayExtents | |
{ | |
using ArrayExtents = TArrayExtents; | |
using ArrayIndex = typename ArrayExtents::Index; | |
using RecordDim = TRecordDim; | |
using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor; | |
static constexpr std::size_t blobCount = 1; | |
constexpr AoSoA() = default; | |
LLAMA_FN_HOST_ACC_INLINE constexpr explicit AoSoA(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents | |
{ | |
return ArrayExtents{*this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t) const -> std::size_t | |
{ | |
return roundUpToMultiple( | |
LinearizeArrayDimsFunctor{}.size(extents()) * sizeOf<RecordDim>, | |
Lanes * sizeOf<RecordDim>); | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const | |
-> NrAndOffset | |
{ | |
constexpr std::size_t flatFieldIndex = | |
#ifdef __NVCC__ | |
*& // mess with nvcc compiler state to workaround bug | |
#endif | |
Flattener::template flatIndex<RecordCoords...>; | |
const auto flatArrayIndex = LinearizeArrayDimsFunctor{}(ai, extents()); | |
const auto blockIndex = flatArrayIndex / Lanes; | |
const auto laneIndex = flatArrayIndex % Lanes; | |
const auto offset = (sizeOf<RecordDim> * Lanes) * blockIndex | |
+ flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, false> * Lanes | |
+ sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>) * laneIndex; | |
return {0, offset}; | |
} | |
private: | |
using Flattener = FlattenRecordDim<TRecordDim>; | |
}; | |
template<std::size_t Lanes, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
struct PreconfiguredAoSoA | |
{ | |
template<typename ArrayExtents, typename RecordDim> | |
using type = AoSoA<ArrayExtents, RecordDim, Lanes, LinearizeArrayDimsFunctor>; | |
}; | |
template<typename Mapping> | |
inline constexpr bool isAoSoA = false; | |
template<typename AD, typename RD, std::size_t L> | |
inline constexpr bool isAoSoA<AoSoA<AD, RD, L>> = true; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/AoSoA.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./mapping/SoA.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Common.hpp" // amalgamate: file already expanded | |
// #include <limits> // amalgamate: file already included | |
namespace llama::mapping | |
{ | |
/// Struct of array mapping. Used to create a \ref View via \ref allocView. | |
/// \tparam SeparateBuffers If true, every element of the record dimension is mapped to its own buffer. | |
/// \tparam LinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and | |
/// how big the linear domain gets. | |
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened if SeparateBuffers is | |
/// false. See \ref FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref | |
/// FlattenRecordDimDecreasingAlignment and \ref FlattenRecordDimMinimizePadding. | |
template< | |
typename TArrayExtents, | |
typename TRecordDim, | |
bool SeparateBuffers = true, | |
typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, | |
template<typename> typename FlattenRecordDimSingleBlob = FlattenRecordDimInOrder> | |
struct SoA : private TArrayExtents | |
{ | |
using ArrayExtents = TArrayExtents; | |
using ArrayIndex = typename ArrayExtents::Index; | |
using RecordDim = TRecordDim; | |
using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor; | |
static constexpr std::size_t blobCount | |
= SeparateBuffers ? boost::mp11::mp_size<FlatRecordDim<RecordDim>>::value : 1; | |
constexpr SoA() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr explicit SoA(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents | |
{ | |
return ArrayExtents{*this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr auto blobSize([[maybe_unused]] std::size_t blobIndex) const -> std::size_t | |
{ | |
if constexpr(SeparateBuffers) | |
{ | |
constexpr Array<std::size_t, blobCount> typeSizes = []() constexpr | |
{ | |
Array<std::size_t, blobCount> r{}; | |
forEachLeafCoord<RecordDim>([&r, i = 0](auto rc) mutable constexpr | |
{ r[i++] = sizeof(GetType<RecordDim, decltype(rc)>); }); | |
return r; | |
} | |
(); | |
return LinearizeArrayDimsFunctor{}.size(extents()) * typeSizes[blobIndex]; | |
} | |
else | |
{ | |
return LinearizeArrayDimsFunctor{}.size(extents()) * sizeOf<RecordDim>; | |
} | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ad, RecordCoord<RecordCoords...> = {}) const | |
-> NrAndOffset | |
{ | |
if constexpr(SeparateBuffers) | |
{ | |
constexpr auto blob = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>; | |
const auto offset = LinearizeArrayDimsFunctor{}(ad, extents()) | |
* sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>); | |
return {blob, offset}; | |
} | |
else | |
{ | |
constexpr std::size_t flatFieldIndex = | |
#ifdef __NVCC__ | |
*& // mess with nvcc compiler state to workaround bug | |
#endif | |
Flattener::template flatIndex<RecordCoords...>; | |
const auto offset = LinearizeArrayDimsFunctor{}(ad, extents()) | |
* sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>) | |
+ flatOffsetOf< | |
typename Flattener::FlatRecordDim, | |
flatFieldIndex, | |
false> * LinearizeArrayDimsFunctor{}.size(extents()); | |
return {0, offset}; | |
} | |
} | |
private: | |
using Flattener = FlattenRecordDimSingleBlob<TRecordDim>; | |
}; | |
/// Struct of array mapping storing the entire layout in a single blob. | |
/// \see SoA | |
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
using SingleBlobSoA = SoA<ArrayExtents, RecordDim, false, LinearizeArrayDimsFunctor>; | |
/// Struct of array mapping storing each attribute of the record dimension in a separate blob. | |
/// \see SoA | |
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
using MultiBlobSoA = SoA<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor>; | |
template<bool SeparateBuffers = true, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
struct PreconfiguredSoA | |
{ | |
template<typename ArrayExtents, typename RecordDim> | |
using type = SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>; | |
}; | |
template<typename Mapping> | |
inline constexpr bool isSoA = false; | |
template<typename ArrayExtents, typename RecordDim, bool SeparateBuffers, typename LinearizeArrayDimsFunctor> | |
inline constexpr bool isSoA<SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>> = true; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/SoA.hpp == | |
// ============================================================================ | |
#include <cstring> | |
#include <numeric> | |
namespace llama | |
{ | |
namespace internal | |
{ | |
template<typename RecordDim> | |
void assertTrivialCopyable() | |
{ | |
forEachLeafCoord<RecordDim>( | |
[](auto rc) | |
{ | |
static_assert( | |
std::is_trivially_copyable_v<GetType<RecordDim, decltype(rc)>>, | |
"All types in the record dimension must be trivially copyable"); | |
}); | |
} | |
using memcopyFunc = void* (*) (void*, const void*, std::size_t); | |
inline void parallel_memcpy( | |
std::byte* dst, | |
const std::byte* src, | |
std::size_t size, | |
std::size_t threadId = 0, | |
std::size_t threadCount = 1, | |
memcopyFunc singleThreadMemcpy = std::memcpy) | |
{ | |
const auto sizePerThread = size / threadCount; | |
const auto sizeLastThread = sizePerThread + size % threadCount; | |
const auto sizeThisThread = threadId == threadCount - 1 ? sizeLastThread : sizePerThread; | |
singleThreadMemcpy(dst + threadId * sizePerThread, src + threadId * sizePerThread, sizeThisThread); | |
} | |
} // namespace internal | |
/// Direct memcpy from source view blobs to destination view blobs. Both views need to have the same mappings with | |
/// the same array dimensions. | |
/// @param threadId Optional. Zero-based id of calling thread for multi-threaded invocations. | |
/// @param threadCount Optional. Thread count in case of multi-threaded invocation. | |
template<typename Mapping, typename SrcBlob, typename DstBlob> | |
void blobMemcpy( | |
const View<Mapping, SrcBlob>& srcView, | |
View<Mapping, DstBlob>& dstView, | |
std::size_t threadId = 0, | |
std::size_t threadCount = 1) | |
{ | |
internal::assertTrivialCopyable<typename Mapping::RecordDim>(); | |
// TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions | |
if(srcView.mapping().extents() != dstView.mapping().extents()) | |
throw std::runtime_error{"Array dimensions sizes are different"}; | |
// TODO(bgruber): this is maybe not the best parallel copying strategy | |
for(std::size_t i = 0; i < Mapping::blobCount; i++) | |
internal::parallel_memcpy( | |
&dstView.storageBlobs[i][0], | |
&srcView.storageBlobs[i][0], | |
dstView.mapping().blobSize(i), | |
threadId, | |
threadCount); | |
} | |
/// Field-wise copy from source to destination view. Both views need to have the same array and record dimensions. | |
/// @param threadId Optional. Thread id in case of multi-threaded copy. | |
/// @param threadCount Optional. Thread count in case of multi-threaded copy. | |
template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob> | |
void fieldWiseCopy( | |
const View<SrcMapping, SrcBlob>& srcView, | |
View<DstMapping, DstBlob>& dstView, | |
std::size_t threadId = 0, | |
std::size_t threadCount = 1) | |
{ | |
// TODO(bgruber): think if we can remove this restriction | |
static_assert( | |
std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>, | |
"The source and destination record dimensions must be the same"); | |
if(srcView.mapping().extents() != dstView.mapping().extents()) | |
throw std::runtime_error{"Array dimensions sizes are different"}; | |
auto copyOne = [&](auto ai) LLAMA_LAMBDA_INLINE | |
{ | |
forEachLeafCoord<typename DstMapping::RecordDim>([&](auto rc) LLAMA_LAMBDA_INLINE | |
{ dstView(ai)(rc) = srcView(ai)(rc); }); | |
}; | |
constexpr auto dims = SrcMapping::ArrayExtents::rank; | |
const auto extents = srcView.mapping().extents().toArray(); | |
const auto workPerThread = (extents[0] + threadCount - 1) / threadCount; | |
const auto start = threadId * workPerThread; | |
const auto end = std::min((threadId + 1) * workPerThread, extents[0]); | |
for(auto i = start; i < end; i++) | |
{ | |
if constexpr(dims > 1) | |
forEachADCoord(ArrayIndex<dims - 1>{pop_front(extents)}, copyOne, static_cast<std::size_t>(i)); | |
else | |
copyOne(ArrayIndex<dims>{static_cast<std::size_t>(i)}); | |
} | |
} | |
namespace internal | |
{ | |
template<typename Mapping> | |
inline constexpr std::size_t aosoaLanes = 0; | |
template<typename ArrayExtents, typename RecordDim, bool SeparateBuffers, typename LinearizeArrayDimsFunctor> | |
inline constexpr std::size_t aosoaLanes< | |
mapping::SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>> = std:: | |
numeric_limits<std::size_t>::max(); | |
template<typename ArrayExtents, typename RecordDim, std::size_t Lanes, typename LinearizeArrayDimsFunctor> | |
inline constexpr std::size_t | |
aosoaLanes<mapping::AoSoA<ArrayExtents, RecordDim, Lanes, LinearizeArrayDimsFunctor>> = Lanes; | |
} // namespace internal | |
/// AoSoA copy strategy which transfers data in common blocks. SoA mappings are also allowed for at most 1 | |
/// argument. | |
/// @param threadId Optional. Zero-based id of calling thread for multi-threaded invocations. | |
/// @param threadCount Optional. Thread count in case of multi-threaded invocation. | |
template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob> | |
void aosoaCommonBlockCopy( | |
const View<SrcMapping, SrcBlob>& srcView, | |
View<DstMapping, DstBlob>& dstView, | |
bool readOpt, | |
std::size_t threadId = 0, | |
std::size_t threadCount = 1) | |
{ | |
// TODO(bgruber): think if we can remove this restriction | |
static_assert( | |
std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>, | |
"The source and destination record dimensions must be the same"); | |
static_assert( | |
std::is_same_v< | |
typename SrcMapping::LinearizeArrayDimsFunctor, | |
typename DstMapping::LinearizeArrayDimsFunctor>, | |
"Source and destination mapping need to use the same array dimensions linearizer"); | |
using RecordDim = typename SrcMapping::RecordDim; | |
internal::assertTrivialCopyable<RecordDim>(); | |
[[maybe_unused]] static constexpr bool MBSrc = SrcMapping::blobCount > 1; | |
[[maybe_unused]] static constexpr bool MBDst = DstMapping::blobCount > 1; | |
static constexpr auto LanesSrc = internal::aosoaLanes<SrcMapping>; | |
static constexpr auto LanesDst = internal::aosoaLanes<DstMapping>; | |
if(srcView.mapping().extents() != dstView.mapping().extents()) | |
throw std::runtime_error{"Array dimensions sizes are different"}; | |
static constexpr auto srcIsAoSoA = LanesSrc != std::numeric_limits<std::size_t>::max(); | |
static constexpr auto dstIsAoSoA = LanesDst != std::numeric_limits<std::size_t>::max(); | |
static_assert(srcIsAoSoA || dstIsAoSoA, "At least one of the mappings must be an AoSoA mapping"); | |
static_assert( | |
!srcIsAoSoA || std::tuple_size_v<decltype(srcView.storageBlobs)> == 1, | |
"Implementation assumes AoSoA with single blob"); | |
static_assert( | |
!dstIsAoSoA || std::tuple_size_v<decltype(dstView.storageBlobs)> == 1, | |
"Implementation assumes AoSoA with single blob"); | |
const auto flatSize = product(dstView.mapping().extents()); | |
// TODO(bgruber): implement the following by adding additional copy loops for the remaining elements | |
if(!srcIsAoSoA && flatSize % LanesDst != 0) | |
throw std::runtime_error{"Source SoA mapping's total array elements must be evenly divisible by the " | |
"destination AoSoA Lane count."}; | |
if(!dstIsAoSoA && flatSize % LanesSrc != 0) | |
throw std::runtime_error{"Destination SoA mapping's total array elements must be evenly divisible by the " | |
"source AoSoA Lane count."}; | |
// the same as AoSoA::blobNrAndOffset but takes a flat array index | |
auto mapAoSoA = [](std::size_t flatArrayIndex, auto rc, std::size_t Lanes) LLAMA_LAMBDA_INLINE | |
{ | |
const auto blockIndex = flatArrayIndex / Lanes; | |
const auto laneIndex = flatArrayIndex % Lanes; | |
const auto offset = (sizeOf<RecordDim> * Lanes) * blockIndex + offsetOf<RecordDim, decltype(rc)> * Lanes | |
+ sizeof(GetType<RecordDim, decltype(rc)>) * laneIndex; | |
return offset; | |
}; | |
// the same as SoA::blobNrAndOffset but takes a flat array index | |
auto mapSoA = [&](std::size_t flatArrayIndex, auto rc, bool mb) LLAMA_LAMBDA_INLINE | |
{ | |
const auto blob = mb * flatRecordCoord<RecordDim, decltype(rc)>; | |
const auto offset = !mb * offsetOf<RecordDim, decltype(rc)> * flatSize | |
+ sizeof(GetType<RecordDim, decltype(rc)>) * flatArrayIndex; | |
return NrAndOffset{blob, offset}; | |
}; | |
auto mapSrc = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
if constexpr(srcIsAoSoA) | |
return &srcView.storageBlobs[0][0] + mapAoSoA(flatArrayIndex, rc, LanesSrc); | |
else | |
{ | |
const auto [blob, off] = mapSoA(flatArrayIndex, rc, MBSrc); | |
return &srcView.storageBlobs[blob][off]; | |
} | |
}; | |
auto mapDst = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
if constexpr(dstIsAoSoA) | |
return &dstView.storageBlobs[0][0] + mapAoSoA(flatArrayIndex, rc, LanesDst); | |
else | |
{ | |
const auto [blob, off] = mapSoA(flatArrayIndex, rc, MBDst); | |
return &dstView.storageBlobs[blob][off]; | |
} | |
}; | |
static constexpr auto L = [] | |
{ | |
if constexpr(srcIsAoSoA && dstIsAoSoA) | |
return std::gcd(LanesSrc, LanesDst); | |
return std::min(LanesSrc, LanesDst); | |
}(); | |
if(readOpt) | |
{ | |
// optimized for linear reading | |
constexpr auto srcL = srcIsAoSoA ? LanesSrc : L; | |
const auto elementsPerThread = flatSize / srcL / threadCount * srcL; | |
{ | |
const auto start = threadId * elementsPerThread; | |
const auto stop = threadId == threadCount - 1 ? flatSize : (threadId + 1) * elementsPerThread; | |
auto copyLBlock = [&](const std::byte*& threadSrc, std::size_t dstIndex, auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
constexpr auto bytes = L * sizeof(GetType<RecordDim, decltype(rc)>); | |
std::memcpy(mapDst(dstIndex, rc), threadSrc, bytes); | |
threadSrc += bytes; | |
}; | |
if constexpr(srcIsAoSoA) | |
{ | |
auto* threadSrc = mapSrc(start, RecordCoord<>{}); | |
for(std::size_t i = start; i < stop; i += LanesSrc) | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
for(std::size_t j = 0; j < LanesSrc; j += L) | |
copyLBlock(threadSrc, i + j, rc); | |
}); | |
} | |
else | |
{ | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
auto* threadSrc = mapSrc(start, rc); | |
for(std::size_t i = start; i < stop; i += L) | |
copyLBlock(threadSrc, i, rc); | |
}); | |
} | |
} | |
} | |
else | |
{ | |
// optimized for linear writing | |
constexpr auto dstL = dstIsAoSoA ? LanesDst : L; | |
const auto elementsPerThread = flatSize / dstL / threadCount * dstL; | |
{ | |
const auto start = threadId * elementsPerThread; | |
const auto stop = threadId == threadCount - 1 ? flatSize : (threadId + 1) * elementsPerThread; | |
auto copyLBlock = [&](std::byte*& threadDst, std::size_t srcIndex, auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
constexpr auto bytes = L * sizeof(GetType<RecordDim, decltype(rc)>); | |
std::memcpy(threadDst, mapSrc(srcIndex, rc), bytes); | |
threadDst += bytes; | |
}; | |
if constexpr(dstIsAoSoA) | |
{ | |
auto* threadDst = mapDst(start, RecordCoord<>{}); | |
for(std::size_t i = start; i < stop; i += LanesDst) | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
for(std::size_t j = 0; j < LanesDst; j += L) | |
copyLBlock(threadDst, i + j, rc); | |
}); | |
} | |
else | |
{ | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) LLAMA_LAMBDA_INLINE | |
{ | |
auto* threadDst = mapDst(start, rc); | |
for(std::size_t i = start; i < stop; i += L) | |
copyLBlock(threadDst, i, rc); | |
}); | |
} | |
} | |
} | |
} | |
/// @brief Generic implementation of \ref copy defaulting to \ref fieldWiseCopy. LLAMA provides several | |
/// specializations of this construct for specific mappings. Users are encouraged to also specialize this template | |
/// with better copy algorithms for further combinations of mappings, if they can and want to provide a better | |
/// implementation. | |
template<typename SrcMapping, typename DstMapping, typename SFINAE = void> | |
struct Copy | |
{ | |
template<typename SrcView, typename DstView> | |
void operator()(const SrcView& srcView, DstView& dstView, std::size_t threadId, std::size_t threadCount) const | |
{ | |
fieldWiseCopy(srcView, dstView, threadId, threadCount); | |
} | |
}; | |
template<typename Mapping> | |
struct Copy<Mapping, Mapping> | |
{ | |
template<typename SrcView, typename DstView> | |
void operator()(const SrcView& srcView, DstView& dstView, std::size_t threadId, std::size_t threadCount) const | |
{ | |
blobMemcpy(srcView, dstView, threadId, threadCount); | |
} | |
}; | |
template< | |
typename ArrayExtents, | |
typename RecordDim, | |
typename LinearizeArrayDims, | |
std::size_t LanesSrc, | |
std::size_t LanesDst> | |
struct Copy< | |
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>, | |
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>, | |
std::enable_if_t<LanesSrc != LanesDst>> | |
{ | |
template<typename SrcBlob, typename DstBlob> | |
void operator()( | |
const View<mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>, SrcBlob>& srcView, | |
View<mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>, DstBlob>& dstView, | |
std::size_t threadId, | |
std::size_t threadCount) | |
{ | |
constexpr auto readOpt = true; // TODO(bgruber): how to choose? | |
aosoaCommonBlockCopy(srcView, dstView, readOpt, threadId, threadCount); | |
} | |
}; | |
template< | |
typename ArrayExtents, | |
typename RecordDim, | |
typename LinearizeArrayDims, | |
std::size_t LanesSrc, | |
bool DstSeparateBuffers> | |
struct Copy< | |
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>, | |
mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, LinearizeArrayDims>> | |
{ | |
template<typename SrcBlob, typename DstBlob> | |
void operator()( | |
const View<mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>, SrcBlob>& srcView, | |
View<mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, LinearizeArrayDims>, DstBlob>& dstView, | |
std::size_t threadId, | |
std::size_t threadCount) | |
{ | |
constexpr auto readOpt = true; // TODO(bgruber): how to choose? | |
aosoaCommonBlockCopy(srcView, dstView, readOpt, threadId, threadCount); | |
} | |
}; | |
template< | |
typename ArrayExtents, | |
typename RecordDim, | |
typename LinearizeArrayDims, | |
std::size_t LanesDst, | |
bool SrcSeparateBuffers> | |
struct Copy< | |
mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, LinearizeArrayDims>, | |
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>> | |
{ | |
template<typename SrcBlob, typename DstBlob> | |
void operator()( | |
const View<mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, LinearizeArrayDims>, SrcBlob>& | |
srcView, | |
View<mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>, DstBlob>& dstView, | |
std::size_t threadId, | |
std::size_t threadCount) | |
{ | |
constexpr auto readOpt = true; // TODO(bgruber): how to choose? | |
aosoaCommonBlockCopy(srcView, dstView, readOpt, threadId, threadCount); | |
} | |
}; | |
/// Copy data from source view to destination view. Both views need to have the same array and record | |
/// dimensions. Delegates to \ref Copy to choose an implementation. | |
/// @param threadId Optional. Zero-based id of calling thread for multi-threaded invocations. | |
/// @param threadCount Optional. Thread count in case of multi-threaded invocation. | |
template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob> | |
void copy( | |
const View<SrcMapping, SrcBlob>& srcView, | |
View<DstMapping, DstBlob>& dstView, | |
std::size_t threadId = 0, | |
std::size_t threadCount = 1) | |
{ | |
Copy<SrcMapping, DstMapping>{}(srcView, dstView, threadId, threadCount); | |
} | |
} // namespace llama | |
// == | |
// == ./Copy.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./DumpMapping.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
#if !__has_include(<fmt/format.h>) | |
# error DumpMapping.hpp requires the fmt library | |
#endif | |
// #include "ArrayIndexRange.hpp" // amalgamate: file already expanded | |
// #include "Core.hpp" // amalgamate: file already expanded | |
#include <boost/functional/hash.hpp> | |
#include <fmt/format.h> | |
// #include <string> // amalgamate: file already included | |
// #include <vector> // amalgamate: file already included | |
namespace llama | |
{ | |
namespace internal | |
{ | |
template<std::size_t... Coords> | |
auto toVec(RecordCoord<Coords...>) -> std::vector<std::size_t> | |
{ | |
return {Coords...}; | |
} | |
inline auto color(const std::vector<std::size_t>& recordCoord) -> std::size_t | |
{ | |
auto c = boost::hash<std::vector<std::size_t>>{}(recordCoord) &0xFFFFFF; | |
c |= 0x404040; // ensure color per channel is at least 0x40. | |
return c; | |
} | |
template<std::size_t Dim> | |
auto formatArrayIndex(const ArrayIndex<Dim>& ai) | |
{ | |
if constexpr(Dim == 1) | |
return std::to_string(ai[0]); | |
else | |
{ | |
std::string s = "{"; | |
for(auto v : ai) | |
{ | |
if(s.size() >= 2) | |
s += ","; | |
s += std::to_string(v); | |
} | |
s += "}"; | |
return s; | |
} | |
} | |
template<std::size_t Dim> | |
struct FieldBox | |
{ | |
ArrayIndex<Dim> arrayIndex; | |
std::vector<std::size_t> recordCoord; | |
std::string recordTags; | |
NrAndOffset nrAndOffset; | |
std::size_t size; | |
}; | |
template<typename Mapping> | |
auto boxesFromMapping(const Mapping& mapping) -> std::vector<FieldBox<Mapping::ArrayIndex::rank>> | |
{ | |
std::vector<FieldBox<Mapping::ArrayIndex::rank>> infos; | |
using RecordDim = typename Mapping::RecordDim; | |
for(auto ai : ArrayIndexRange{mapping.extents()}) | |
{ | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) | |
{ | |
infos.push_back( | |
{ai, | |
internal::toVec(rc), | |
recordCoordTags<RecordDim>(rc), | |
mapping.blobNrAndOffset(ai, rc), | |
sizeof(GetType<RecordDim, decltype(rc)>)}); | |
}); | |
} | |
return infos; | |
} | |
template<std::size_t Dim> | |
auto breakBoxes(std::vector<FieldBox<Dim>> boxes, std::size_t wrapByteCount) -> std::vector<FieldBox<Dim>> | |
{ | |
for(std::size_t i = 0; i < boxes.size(); i++) | |
{ | |
auto& fb = boxes[i]; | |
if(fb.nrAndOffset.offset / wrapByteCount != (fb.nrAndOffset.offset + fb.size - 1) / wrapByteCount) | |
{ | |
const auto remainingSpace = wrapByteCount - fb.nrAndOffset.offset % wrapByteCount; | |
auto newFb = fb; | |
newFb.nrAndOffset.offset = fb.nrAndOffset.offset + remainingSpace; | |
newFb.size = fb.size - remainingSpace; | |
fb.size = remainingSpace; | |
boxes.push_back(newFb); | |
} | |
} | |
return boxes; | |
} | |
inline auto cssClass(std::string tags) | |
{ | |
std::replace(begin(tags), end(tags), '.', '_'); | |
std::replace(begin(tags), end(tags), '<', '_'); | |
std::replace(begin(tags), end(tags), '>', '_'); | |
return tags; | |
}; | |
} // namespace internal | |
/// Returns an SVG image visualizing the memory layout created by the given mapping. The created memory blocks are | |
/// wrapped after wrapByteCount bytes. | |
template<typename Mapping> | |
auto toSvg(const Mapping& mapping, std::size_t wrapByteCount = 64, bool breakBoxes = true) -> std::string | |
{ | |
constexpr auto byteSizeInPixel = 30; | |
constexpr auto blobBlockWidth = 60; | |
auto infos = internal::boxesFromMapping(mapping); | |
if(breakBoxes) | |
infos = internal::breakBoxes(std::move(infos), wrapByteCount); | |
std::string svg; | |
std::array<int, Mapping::blobCount + 1> blobYOffset{}; | |
for(std::size_t i = 0; i < Mapping::blobCount; i++) | |
{ | |
const auto blobRows = (mapping.blobSize(i) + wrapByteCount - 1) / wrapByteCount; | |
blobYOffset[i + 1] = blobYOffset[i] + (blobRows + 1) * byteSizeInPixel; // one row gap between blobs | |
const auto height = blobRows * byteSizeInPixel; | |
svg += fmt::format( | |
R"a(<rect x="0" y="{}" width="{}" height="{}" fill="#AAA" stroke="#000"/> | |
<text x="{}" y="{}" fill="#000" text-anchor="middle">Blob: {}</text> | |
)a", | |
blobYOffset[i], | |
blobBlockWidth, | |
height, | |
blobBlockWidth / 2, | |
blobYOffset[i] + height / 2, | |
i); | |
} | |
svg = fmt::format( | |
R"(<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
<svg width="{}" height="{}" xmlns="http://www.w3.org/2000/svg"> | |
<style> | |
.label {{ font: {}px sans-serif; }} | |
</style> | |
)", | |
blobBlockWidth + wrapByteCount * byteSizeInPixel, | |
blobYOffset.back() - byteSizeInPixel, | |
byteSizeInPixel / 2) | |
+ svg; | |
for(const auto& info : infos) | |
{ | |
const auto blobY = blobYOffset[info.nrAndOffset.nr]; | |
auto x = (info.nrAndOffset.offset % wrapByteCount) * byteSizeInPixel + blobBlockWidth; | |
auto y = (info.nrAndOffset.offset / wrapByteCount) * byteSizeInPixel + blobY; | |
const auto fill = internal::color(info.recordCoord); | |
const auto width = byteSizeInPixel * info.size; | |
constexpr auto cropBoxes = true; | |
if(cropBoxes) | |
{ | |
svg += fmt::format( | |
R"(<svg x="{}" y="{}" width="{}" height="{}"> | |
)", | |
x, | |
y, | |
width, | |
byteSizeInPixel); | |
x = 0; | |
y = 0; | |
} | |
svg += fmt::format( | |
R"(<rect x="{}" y="{}" width="{}" height="{}" fill="#{:X}" stroke="#000"/> | |
)", | |
x, | |
y, | |
width, | |
byteSizeInPixel, | |
fill); | |
for(std::size_t i = 1; i < info.size; i++) | |
{ | |
svg += fmt::format( | |
R"(<line x1="{}" y1="{}" x2="{}" y2="{}" stroke="#777"/> | |
)", | |
x + i * byteSizeInPixel, | |
y + byteSizeInPixel * 2 / 3, | |
x + i * byteSizeInPixel, | |
y + byteSizeInPixel); | |
} | |
svg += fmt::format( | |
R"(<text x="{}" y="{}" fill="#000" text-anchor="middle" class="label">{} {}</text> | |
)", | |
x + width / 2, | |
y + byteSizeInPixel * 3 / 4, | |
internal::formatArrayIndex(info.arrayIndex), | |
info.recordTags); | |
if(cropBoxes) | |
svg += R"(</svg> | |
)"; | |
} | |
svg += "</svg>"; | |
return svg; | |
} | |
/// Returns an HTML document visualizing the memory layout created by the given mapping. The visualization is | |
/// resizeable. | |
template<typename Mapping> | |
auto toHtml(const Mapping& mapping) -> std::string | |
{ | |
constexpr auto byteSizeInPixel = 30; | |
constexpr auto rulerLengthInBytes = 512; | |
constexpr auto rulerByteInterval = 8; | |
auto infos = internal::boxesFromMapping(mapping); | |
std::stable_sort( | |
begin(infos), | |
end(infos), | |
[](const auto& a, const auto& b) { | |
return std::tie(a.nrAndOffset.nr, a.nrAndOffset.offset) | |
< std::tie(b.nrAndOffset.nr, b.nrAndOffset.offset); | |
}); | |
infos.erase( | |
std::unique( | |
begin(infos), | |
end(infos), | |
[](const auto& a, const auto& b) { return a.nrAndOffset == b.nrAndOffset; }), | |
end(infos)); | |
std::string html; | |
html += fmt::format( | |
R"(<!DOCTYPE html> | |
<html> | |
<head> | |
<style> | |
.box {{ | |
outline: 1px solid; | |
display: inline-block; | |
white-space: nowrap; | |
height: {}px; | |
background: repeating-linear-gradient(90deg, #0000, #0000 29px, #777 29px, #777 30px); | |
text-align: center; | |
overflow: hidden; | |
vertical-align: middle; | |
}} | |
#ruler {{ | |
background: repeating-linear-gradient(90deg, #0000, #0000 29px, #000 29px, #000 30px); | |
border-bottom: 1px solid; | |
height: 20px; | |
margin-bottom: 20px; | |
}} | |
#ruler div {{ | |
position: absolute; | |
display: inline-block; | |
}} | |
)", | |
byteSizeInPixel); | |
using RecordDim = typename Mapping::RecordDim; | |
forEachLeafCoord<RecordDim>( | |
[&](auto rc) | |
{ | |
constexpr int size = sizeof(GetType<RecordDim, decltype(rc)>); | |
html += fmt::format( | |
R"(.{} {{ | |
width: {}px; | |
background-color: #{:X}; | |
}} | |
)", | |
internal::cssClass(recordCoordTags<RecordDim>(rc)), | |
byteSizeInPixel * size, | |
internal::color(internal::toVec(rc))); | |
}); | |
html += fmt::format(R"(</style> | |
</head> | |
<body> | |
<header id="ruler"> | |
)"); | |
for(auto i = 0; i < rulerLengthInBytes; i += rulerByteInterval) | |
html += fmt::format( | |
R"(</style> | |
<div style="margin-left: {}px;">{}</div>)", | |
i * byteSizeInPixel, | |
i); | |
html += fmt::format(R"( | |
</header> | |
)"); | |
auto currentBlobNr = std::numeric_limits<std::size_t>::max(); | |
for(const auto& info : infos) | |
{ | |
if(currentBlobNr != info.nrAndOffset.nr) | |
{ | |
currentBlobNr = info.nrAndOffset.nr; | |
html += fmt::format("<h1>Blob: {}</h1>", currentBlobNr); | |
} | |
html += fmt::format( | |
R"(<div class="box {0}" title="{1} {2}">{1} {2}</div>)", | |
internal::cssClass(info.recordTags), | |
internal::formatArrayIndex(info.arrayIndex), | |
info.recordTags); | |
} | |
html += R"(</body> | |
</html>)"; | |
return html; | |
} | |
} // namespace llama | |
// == | |
// == ./DumpMapping.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./llama.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
/// \mainpage LLAMA API documentation | |
/// | |
/// LLAMA is a C++17 template header-only library for the abstraction of memory access patterns. It distinguishes | |
/// between the view of the algorithm on the memory and the real layout in the background. This enables performance | |
/// portability for multicore, manycore and gpu applications with the very same code. | |
/// | |
/// In contrast to many other solutions LLAMA can define nested data structures of arbitrary depths and is not limited | |
/// only to struct of array and array of struct data layouts. It is also capable to explicitly define padding, | |
/// blocking, striding and any other run time or compile time access pattern simultaneously. | |
/// | |
/// To archieve this goal LLAMA is split into mostly independent, orthogonal parts completely written in modern C++17 | |
/// to run on as many architectures and with as many compilers as possible while still supporting extensions needed | |
/// e.g. to run on GPU or other many core hardware. | |
/// | |
/// This page documents the API of LLAMA. The user documentation and an overview about the concepts and ideas can be | |
/// found here: https://llama-doc.rtfd.io | |
/// | |
/// LLAMA is licensed under the LGPL3+. | |
#define LLAMA_VERSION_MAJOR 0 | |
#define LLAMA_VERSION_MINOR 3 | |
#define LLAMA_VERSION_PATCH 0 | |
#ifdef __NVCC__ | |
# pragma push | |
# if __CUDACC_VER_MAJOR__ * 1000 + __CUDACC_VER_MINOR__ >= 11005 | |
# pragma nv_diag_suppress 940 | |
# else | |
# pragma diag_suppress 940 | |
# endif | |
#endif | |
// #include "ArrayExtents.hpp" // amalgamate: file already expanded | |
// #include "ArrayIndexRange.hpp" // amalgamate: file already expanded | |
// #include "BlobAllocators.hpp" // amalgamate: file already expanded | |
// #include "Copy.hpp" // amalgamate: file already expanded | |
// #include "Core.hpp" // amalgamate: file already expanded | |
// #include "Meta.hpp" // amalgamate: file already expanded | |
// #include "Vector.hpp" // amalgamate: file already expanded | |
// #include "View.hpp" // amalgamate: file already expanded | |
// #include "VirtualRecord.hpp" // amalgamate: file already expanded | |
// #include "macros.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/AoS.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Common.hpp" // amalgamate: file already expanded | |
namespace llama::mapping | |
{ | |
/// Array of struct mapping. Used to create a \ref View via \ref allocView. | |
/// \tparam AlignAndPad If true, padding bytes are inserted to guarantee that struct members are properly aligned. | |
/// If false, struct members are tightly packed. | |
/// \tparam T_LinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and | |
/// how big the linear domain gets. | |
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref | |
/// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and | |
/// \ref FlattenRecordDimMinimizePadding. | |
template< | |
typename TArrayExtents, | |
typename TRecordDim, | |
bool AlignAndPad = true, | |
typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, | |
template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder> | |
struct AoS : private TArrayExtents | |
{ | |
using ArrayExtents = TArrayExtents; | |
using ArrayIndex = typename ArrayExtents::Index; | |
using RecordDim = TRecordDim; | |
using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor; | |
static constexpr std::size_t blobCount = 1; | |
constexpr AoS() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr explicit AoS(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents | |
{ | |
return *this; | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t) const -> std::size_t | |
{ | |
return LinearizeArrayDimsFunctor{}.size(extents()) | |
* flatSizeOf<typename Flattener::FlatRecordDim, AlignAndPad>; | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const | |
-> NrAndOffset | |
{ | |
constexpr std::size_t flatFieldIndex = | |
#ifdef __NVCC__ | |
*& // mess with nvcc compiler state to workaround bug | |
#endif | |
Flattener::template flatIndex<RecordCoords...>; | |
const auto offset | |
= LinearizeArrayDimsFunctor{}(ai, extents()) | |
* flatSizeOf< | |
typename Flattener::FlatRecordDim, | |
AlignAndPad> + flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, AlignAndPad>; | |
return {0, offset}; | |
} | |
private: | |
using Flattener = FlattenRecordDim<TRecordDim>; | |
}; | |
/// Array of struct mapping preserving the alignment of the field types by inserting padding. | |
/// \see AoS | |
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
using AlignedAoS = AoS<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor>; | |
/// Array of struct mapping preserving the alignment of the field types by inserting padding and permuting the | |
/// field order to minimize this padding. \see AoS | |
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
using MinAlignedAoS | |
= AoS<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor, FlattenRecordDimMinimizePadding>; | |
/// Array of struct mapping packing the field types tightly, violating the types alignment requirements. | |
/// \see AoS | |
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
using PackedAoS = AoS<ArrayExtents, RecordDim, false, LinearizeArrayDimsFunctor>; | |
template<bool AlignAndPad = true, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp> | |
struct PreconfiguredAoS | |
{ | |
template<typename ArrayExtents, typename RecordDim> | |
using type = AoS<ArrayExtents, RecordDim, AlignAndPad, LinearizeArrayDimsFunctor>; | |
}; | |
template<typename Mapping> | |
inline constexpr bool isAoS = false; | |
template< | |
typename ArrayExtents, | |
typename RecordDim, | |
bool AlignAndPad, | |
typename LinearizeArrayDimsFunctor, | |
template<typename> | |
typename FlattenRecordDim> | |
inline constexpr bool | |
isAoS<AoS<ArrayExtents, RecordDim, AlignAndPad, LinearizeArrayDimsFunctor, FlattenRecordDim>> = true; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/AoS.hpp == | |
// ============================================================================ | |
// #include "mapping/AoSoA.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/Bytesplit.hpp == | |
// == | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Common.hpp" // amalgamate: file already expanded | |
namespace llama::mapping | |
{ | |
namespace internal | |
{ | |
template<typename T> | |
using ReplaceByByteArray = std::byte[sizeof(T)]; | |
template<typename RecordDim> | |
using SplitBytes = TransformLeaves<RecordDim, ReplaceByByteArray>; | |
} // namespace internal | |
template<typename TArrayExtents, typename TRecordDim, template<typename, typename> typename InnerMapping> | |
struct Bytesplit : private InnerMapping<TArrayExtents, internal::SplitBytes<TRecordDim>> | |
{ | |
using Inner = InnerMapping<TArrayExtents, internal::SplitBytes<TRecordDim>>; | |
using ArrayExtents = typename Inner::ArrayExtents; | |
using ArrayIndex = typename Inner::ArrayIndex; | |
using RecordDim = TRecordDim; // hide Inner::RecordDim | |
using Inner::blobCount; | |
using Inner::blobSize; | |
using Inner::extents; | |
using Inner::Inner; | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr explicit Bytesplit(TArrayExtents extents, TRecordDim = {}) : Inner(extents) | |
{ | |
} | |
template<std::size_t... RecordCoords> | |
static constexpr auto isComputed(RecordCoord<RecordCoords...>) | |
{ | |
return true; | |
} | |
template<typename QualifiedBase, typename RC, typename BlobArray> | |
struct Reference | |
{ | |
QualifiedBase& innerMapping; | |
ArrayIndex ai; | |
BlobArray& blobs; | |
using DstType = GetType<TRecordDim, RC>; | |
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions) | |
operator DstType() const | |
{ | |
DstType v; | |
auto* p = reinterpret_cast<std::byte*>(&v); | |
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<sizeof(DstType)>>( | |
[&](auto ic) | |
{ | |
constexpr auto i = decltype(ic)::value; | |
const auto [nr, off] = innerMapping.blobNrAndOffset(ai, Cat<RC, RecordCoord<i>>{}); | |
p[i] = blobs[nr][off]; | |
}); | |
return v; | |
} | |
auto operator=(DstType v) -> Reference& | |
{ | |
auto* p = reinterpret_cast<std::byte*>(&v); | |
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<sizeof(DstType)>>( | |
[&](auto ic) | |
{ | |
constexpr auto i = decltype(ic)::value; | |
const auto [nr, off] = innerMapping.blobNrAndOffset(ai, Cat<RC, RecordCoord<i>>{}); | |
blobs[nr][off] = p[i]; | |
}); | |
return *this; | |
} | |
}; | |
template<std::size_t... RecordCoords, typename BlobArray> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto compute( | |
typename Inner::ArrayIndex ai, | |
RecordCoord<RecordCoords...>, | |
BlobArray& blobs) const | |
{ | |
return Reference<decltype(*this), RecordCoord<RecordCoords...>, BlobArray>{*this, ai, blobs}; | |
} | |
}; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/Bytesplit.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./mapping/Heatmap.hpp == | |
// == | |
// #pragma once | |
// #include "Common.hpp" // amalgamate: file already expanded | |
// #include <array> // amalgamate: file already included | |
#include <atomic> | |
#include <sstream> | |
// #include <vector> // amalgamate: file already included | |
namespace llama::mapping | |
{ | |
/// Forwards all calls to the inner mapping. Counts all accesses made to all bytes, allowing to extract a heatmap. | |
/// \tparam Mapping The type of the inner mapping. | |
template<typename Mapping, typename CountType = std::size_t> | |
struct Heatmap | |
{ | |
using ArrayExtents = typename Mapping::ArrayExtents; | |
using ArrayIndex = typename Mapping::ArrayIndex; | |
using RecordDim = typename Mapping::RecordDim; | |
static constexpr std::size_t blobCount = Mapping::blobCount; | |
constexpr Heatmap() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
explicit Heatmap(Mapping mapping) : mapping(mapping) | |
{ | |
for(std::size_t i = 0; i < blobCount; i++) | |
byteHits[i] = std::vector<std::atomic<CountType>>(blobSize(i)); | |
} | |
Heatmap(const Heatmap&) = delete; | |
auto operator=(const Heatmap&) -> Heatmap& = delete; | |
Heatmap(Heatmap&&) noexcept = default; | |
auto operator=(Heatmap&&) noexcept -> Heatmap& = default; | |
~Heatmap() = default; | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents | |
{ | |
return mapping.extents(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t i) const -> std::size_t | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return mapping.blobSize(i); | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> rc = {}) const | |
-> NrAndOffset | |
{ | |
const auto nao = mapping.blobNrAndOffset(ai, rc); | |
for(std::size_t i = 0; i < sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>); i++) | |
byteHits[nao.nr][nao.offset + i]++; | |
return nao; | |
} | |
auto toGnuplotScript(std::size_t wrapAfterBytes = 64) const -> std::string | |
{ | |
std::stringstream f; | |
f << "#!/usr/bin/gnuplot -p\n$data << EOD\n"; | |
for(std::size_t i = 0; i < blobCount; i++) | |
{ | |
std::size_t byteCount = 0; | |
for(const auto& hits : byteHits[i]) | |
f << hits << ((++byteCount % wrapAfterBytes == 0) ? '\n' : ' '); | |
while(byteCount++ % wrapAfterBytes != 0) | |
f << "0 "; | |
f << '\n'; | |
} | |
f << R"(EOD | |
set view map | |
set xtics format "" | |
set x2tics autofreq 8 | |
set yrange [] reverse | |
set link x2; set link y2 | |
set ylabel "Cacheline" | |
set x2label "Byte" | |
plot $data matrix with image axes x2y1 | |
)"; | |
return f.str(); | |
} | |
Mapping mapping; | |
mutable std::array<std::vector<std::atomic<CountType>>, blobCount> byteHits; | |
}; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/Heatmap.hpp == | |
// ============================================================================ | |
// #include "mapping/One.hpp" // amalgamate: file already expanded | |
// #include "mapping/SoA.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/Split.hpp == | |
// == | |
// #pragma once | |
// #include "Common.hpp" // amalgamate: file already expanded | |
namespace llama::mapping | |
{ | |
namespace internal | |
{ | |
template<typename... Fields, std::size_t FirstCoord, std::size_t... Coords> | |
auto partitionRecordDim(Record<Fields...>, RecordCoord<FirstCoord, Coords...>) | |
{ | |
using namespace boost::mp11; | |
using Rec = Record<Fields...>; | |
if constexpr(sizeof...(Coords) == 0) | |
{ | |
using Part1 = Record<mp_at_c<Rec, FirstCoord>>; | |
using Part2 = mp_erase_c<Rec, FirstCoord, FirstCoord + 1>; | |
return mp_list<Part1, Part2>{}; | |
} | |
else | |
{ | |
using FieldTag = GetTag<Rec, RecordCoord<FirstCoord>>; | |
using FieldType = GetType<Rec, RecordCoord<FirstCoord>>; | |
using InnerPartition = decltype(partitionRecordDim(FieldType{}, RecordCoord<Coords...>{})); | |
using Part1 = Record<Field<FieldTag, mp_first<InnerPartition>>>; | |
using Part2 = mp_replace_at_c<Rec, FirstCoord, Field<FieldTag, mp_second<InnerPartition>>>; | |
return mp_list<Part1, Part2>{}; | |
} | |
} | |
template<typename Acc, typename TagList> | |
struct PartitionFoldOpImpl | |
{ | |
using Part1Before = boost::mp11::mp_first<Acc>; | |
using Part2Before = boost::mp11::mp_second<Acc>; | |
using R = decltype(partitionRecordDim(Part2Before{}, GetCoordFromTags<Part2Before, TagList>{})); | |
using Part1After = boost::mp11::mp_first<R>; | |
using Part2After = boost::mp11::mp_second<R>; | |
using type = boost::mp11::mp_list<MergedRecordDims<Part1Before, Part1After>, Part2After>; | |
}; | |
template<typename Acc, typename TagList> | |
using PartitionFoldOp = typename PartitionFoldOpImpl<Acc, TagList>::type; | |
template<typename... Fields, typename... RCs> | |
auto partitionRecordDim(Record<Fields...>, boost::mp11::mp_list<RCs...>) | |
{ | |
using namespace boost::mp11; | |
using Initial = mp_list<Record<>, Record<Fields...>>; // initially, nothing selected for mapping 1 | |
return mp_fold<mp_list<GetTags<Record<Fields...>, RCs>...>, Initial, PartitionFoldOp>{}; | |
} | |
// workaround for nvcc 11.3 and below: we cannot put the decltype() directly into the Split class | |
template<typename RecordDim, typename RecordCoordForMapping1> | |
struct PartionedRecordDim | |
{ | |
using type = decltype(partitionRecordDim(RecordDim{}, RecordCoordForMapping1{})); | |
}; | |
template<typename RC, typename RecordCoordForMapping1> | |
inline constexpr bool isSelected = RecordCoordCommonPrefixIsSame<RecordCoordForMapping1, RC>; | |
template<typename RC> | |
struct IsSelectedPredicate | |
{ | |
template<typename RecordCoordForMapping1> | |
using fn = boost::mp11::mp_bool<isSelected<RC, RecordCoordForMapping1>>; | |
}; | |
template<typename RC, typename... RecordCoordsForMapping1> | |
inline constexpr bool isSelected<RC, boost::mp11::mp_list<RecordCoordsForMapping1...>> = boost::mp11:: | |
mp_any_of_q<boost::mp11::mp_list<RecordCoordsForMapping1...>, IsSelectedPredicate<RC>>::value; | |
} // namespace internal | |
/// Mapping which splits off a part of the record dimension and maps it differently then the rest. | |
/// \tparam RecordCoordForMapping1 A \ref RecordCoord or a list of RecordCoords selecting the part of the record | |
/// dimension to be mapped differently. | |
/// \tparam MappingTemplate1 The mapping used for the selected part of the record dimension. | |
/// \tparam MappingTemplate2 The mapping used for the not selected part of the record dimension. | |
/// \tparam SeparateBlobs If true, both pieces of the record dimension are mapped to separate blobs. | |
template< | |
typename TArrayExtents, | |
typename TRecordDim, | |
typename RecordCoordForMapping1, | |
template<typename...> | |
typename MappingTemplate1, | |
template<typename...> | |
typename MappingTemplate2, | |
bool SeparateBlobs = false> | |
struct Split | |
{ | |
using ArrayExtents = TArrayExtents; | |
using ArrayIndex = typename ArrayExtents::Index; | |
using RecordDim = TRecordDim; | |
using RecordDimPartitions = typename internal::PartionedRecordDim<RecordDim, RecordCoordForMapping1>::type; | |
using RecordDim1 = boost::mp11::mp_first<RecordDimPartitions>; | |
using RecordDim2 = boost::mp11::mp_second<RecordDimPartitions>; | |
using Mapping1 = MappingTemplate1<ArrayExtents, RecordDim1>; | |
using Mapping2 = MappingTemplate2<ArrayExtents, RecordDim2>; | |
static constexpr std::size_t blobCount = SeparateBlobs ? Mapping1::blobCount + Mapping2::blobCount : 1; | |
static_assert(SeparateBlobs || Mapping1::blobCount == 1); | |
static_assert(SeparateBlobs || Mapping2::blobCount == 1); | |
constexpr Split() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
constexpr explicit Split(ArrayExtents extents) : mapping1(extents), mapping2(extents) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents | |
{ | |
return mapping1.extents(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize([[maybe_unused]] std::size_t i) const -> std::size_t | |
{ | |
if constexpr(SeparateBlobs) | |
{ | |
if(i < Mapping1::blobCount) | |
return mapping1.blobSize(i); | |
return mapping2.blobSize(i - Mapping1::blobCount); | |
} | |
else | |
return mapping1.blobSize(0) + mapping2.blobSize(0); | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const | |
-> NrAndOffset | |
{ | |
using Tags = GetTags<RecordDim, RecordCoord<RecordCoords...>>; | |
if constexpr(internal::isSelected<RecordCoord<RecordCoords...>, RecordCoordForMapping1>) | |
return mapping1.blobNrAndOffset(ai, GetCoordFromTags<RecordDim1, Tags>{}); | |
else | |
{ | |
auto nrAndOffset = mapping2.blobNrAndOffset(ai, GetCoordFromTags<RecordDim2, Tags>{}); | |
if constexpr(SeparateBlobs) | |
nrAndOffset.nr += Mapping1::blobCount; | |
else | |
{ | |
for(std::size_t i = 0; i < Mapping1::blobCount; i++) | |
nrAndOffset.offset += mapping1.blobSize(i); | |
} | |
return nrAndOffset; | |
} | |
} | |
Mapping1 mapping1; | |
Mapping2 mapping2; | |
}; | |
template< | |
typename RecordCoordsForMapping1, | |
template<typename...> | |
typename MappingTemplate1, | |
template<typename...> | |
typename MappingTemplate2, | |
bool SeparateBlobs = false> | |
struct PreconfiguredSplit | |
{ | |
template<typename ArrayExtents, typename RecordDim> | |
using type = Split< | |
ArrayExtents, | |
RecordDim, | |
RecordCoordsForMapping1, | |
MappingTemplate1, | |
MappingTemplate2, | |
SeparateBlobs>; | |
}; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/Split.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./mapping/Trace.hpp == | |
// == | |
// #pragma once | |
// #include "Common.hpp" // amalgamate: file already expanded | |
// #include <atomic> // amalgamate: file already included | |
// #include <iostream> // amalgamate: file already included | |
// #include <string> // amalgamate: file already included | |
#include <unordered_map> | |
namespace llama::mapping | |
{ | |
/// Forwards all calls to the inner mapping. Traces all accesses made through this mapping and prints a summary on | |
/// destruction. | |
/// \tparam Mapping The type of the inner mapping. | |
template<typename Mapping> | |
struct Trace | |
{ | |
using ArrayExtents = typename Mapping::ArrayExtents; | |
using ArrayIndex = typename Mapping::ArrayIndex; | |
using RecordDim = typename Mapping::RecordDim; | |
static constexpr std::size_t blobCount = Mapping::blobCount; | |
constexpr Trace() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
explicit Trace(Mapping mapping, bool printOnDestruction = true) | |
: mapping(mapping) | |
, printOnDestruction(printOnDestruction) | |
{ | |
forEachLeafCoord<RecordDim>([&](auto rc) { fieldHits[recordCoordTags<RecordDim>(rc)] = 0; }); | |
} | |
Trace(const Trace&) = delete; | |
auto operator=(const Trace&) -> Trace& = delete; | |
Trace(Trace&&) noexcept = default; | |
auto operator=(Trace&&) noexcept -> Trace& = default; | |
~Trace() | |
{ | |
if(printOnDestruction && !fieldHits.empty()) | |
print(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents | |
{ | |
return mapping.extents(); | |
} | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t i) const -> std::size_t | |
{ | |
LLAMA_FORCE_INLINE_RECURSIVE | |
return mapping.blobSize(i); | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> rc = {}) const | |
-> NrAndOffset | |
{ | |
const static auto name = recordCoordTags<RecordDim>(RecordCoord<RecordCoords...>{}); | |
fieldHits.at(name)++; | |
LLAMA_FORCE_INLINE_RECURSIVE return mapping.blobNrAndOffset(ai, rc); | |
} | |
void print() const | |
{ | |
std::cout << "Trace mapping, number of accesses:\n"; | |
for(const auto& [k, v] : fieldHits) | |
std::cout << '\t' << k << ":\t" << v << '\n'; | |
} | |
Mapping mapping; | |
mutable std::unordered_map<std::string, std::atomic<std::size_t>> fieldHits; | |
bool printOnDestruction; | |
}; | |
} // namespace llama::mapping | |
// == | |
// == ./mapping/Trace.hpp == | |
// ============================================================================ | |
// ============================================================================ | |
// == ./mapping/tree/Mapping.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "../Common.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/tree/Functors.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// ============================================================================ | |
// == ./mapping/tree/TreeFromDimensions.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "../../Core.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./Tuple.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "Meta.hpp" // amalgamate: file already expanded | |
// #include "macros.hpp" // amalgamate: file already expanded | |
namespace llama | |
{ | |
template<typename... Elements> | |
struct Tuple | |
{ | |
}; | |
/// Tuple class like `std::tuple` but suitable for use with offloading devices like GPUs. | |
template<typename TFirstElement, typename... Elements> | |
struct Tuple<TFirstElement, Elements...> | |
{ | |
using FirstElement = TFirstElement; | |
using RestTuple = Tuple<Elements...>; | |
constexpr Tuple() = default; | |
/// Construct a tuple from values of the same types as the tuple stores. | |
LLAMA_FN_HOST_ACC_INLINE constexpr explicit Tuple(FirstElement first, Elements... rest) | |
: first(std::move(first)) | |
, rest(std::move(rest)...) | |
{ | |
} | |
/// Construct a tuple from forwarded values of potentially different types as the tuple stores. | |
// SFINAE away this ctor if tuple elements cannot be constructed from ctor arguments | |
template< | |
typename T, | |
typename... Ts, | |
std::enable_if_t< | |
sizeof...(Elements) == sizeof...(Ts) | |
&& std::is_constructible_v<TFirstElement, T> && (std::is_constructible_v<Elements, Ts> && ...), | |
int> = 0> | |
LLAMA_FN_HOST_ACC_INLINE constexpr explicit Tuple(T&& firstArg, Ts&&... restArgs) | |
: first(std::forward<T>(firstArg)) | |
, rest(std::forward<Ts>(restArgs)...) | |
{ | |
} | |
FirstElement first; ///< the first element (if existing) | |
#ifndef __NVCC__ | |
[[no_unique_address]] // nvcc 11.3 ICE | |
#endif | |
RestTuple rest; ///< the remaining elements | |
}; | |
template<typename... Elements> | |
Tuple(Elements...) -> Tuple<std::remove_cv_t<std::remove_reference_t<Elements>>...>; | |
template<std::size_t Pos, typename... Elements> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto get(Tuple<Elements...>& tuple) -> auto& | |
{ | |
if constexpr(Pos == 0) | |
return tuple.first; | |
else | |
return get<Pos - 1>(tuple.rest); | |
} | |
template<std::size_t Pos, typename... Elements> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto get(const Tuple<Elements...>& tuple) -> const auto& | |
{ | |
if constexpr(Pos == 0) | |
return tuple.first; | |
else | |
return get<Pos - 1>(tuple.rest); | |
} | |
} // namespace llama | |
template<typename... Elements> | |
struct std::tuple_size<llama::Tuple<Elements...>> | |
{ | |
static constexpr auto value = sizeof...(Elements); | |
}; | |
template<std::size_t I, typename... Elements> | |
struct std::tuple_element<I, llama::Tuple<Elements...>> | |
{ | |
using type = boost::mp11::mp_at_c<llama::Tuple<Elements...>, I>; | |
}; | |
namespace llama | |
{ | |
namespace internal | |
{ | |
template<typename... Elements, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto areEqual( | |
const Tuple<Elements...>& a, | |
const Tuple<Elements...>& b, | |
std::index_sequence<Is...>) -> bool | |
{ | |
return ((get<Is>(a) == get<Is>(b)) && ...); | |
} | |
} // namespace internal | |
template<typename... ElementsA, typename... ElementsB> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(const Tuple<ElementsA...>& a, const Tuple<ElementsB...>& b) | |
-> bool | |
{ | |
using namespace boost::mp11; | |
if constexpr(sizeof...(ElementsA) == sizeof...(ElementsB)) | |
if constexpr(mp_apply<mp_all, mp_transform<std::is_same, mp_list<ElementsA...>, mp_list<ElementsB...>>>:: | |
value) | |
return internal::areEqual(a, b, std::make_index_sequence<sizeof...(ElementsA)>{}); | |
return false; | |
} | |
template<typename... ElementsA, typename... ElementsB> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(const Tuple<ElementsA...>& a, const Tuple<ElementsB...>& b) | |
-> bool | |
{ | |
return !(a == b); | |
} | |
namespace internal | |
{ | |
template<typename Tuple1, typename Tuple2, size_t... Is1, size_t... Is2> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleCatImpl( | |
const Tuple1& t1, | |
const Tuple2& t2, | |
std::index_sequence<Is1...>, | |
std::index_sequence<Is2...>) | |
{ | |
return Tuple{get<Is1>(t1)..., get<Is2>(t2)...}; | |
} | |
} // namespace internal | |
template<typename Tuple1, typename Tuple2> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleCat(const Tuple1& t1, const Tuple2& t2) | |
{ | |
return internal::tupleCatImpl( | |
t1, | |
t2, | |
std::make_index_sequence<std::tuple_size_v<Tuple1>>{}, | |
std::make_index_sequence<std::tuple_size_v<Tuple2>>{}); | |
} | |
namespace internal | |
{ | |
template<std::size_t Pos, typename Tuple, typename Replacement> | |
struct TupleReplaceImpl | |
{ | |
LLAMA_FN_HOST_ACC_INLINE | |
auto operator()(Tuple const tuple, Replacement const replacement) | |
{ | |
return tupleCat( | |
llama::Tuple{tuple.first}, | |
TupleReplaceImpl<Pos - 1, typename Tuple::RestTuple, Replacement>()(tuple.rest, replacement)); | |
}; | |
}; | |
template<typename... Elements, typename Replacement> | |
struct TupleReplaceImpl<0, Tuple<Elements...>, Replacement> | |
{ | |
LLAMA_FN_HOST_ACC_INLINE | |
auto operator()(Tuple<Elements...> tuple, Replacement const replacement) | |
{ | |
return tupleCat(Tuple{replacement}, tuple.rest); | |
}; | |
}; | |
template<typename OneElement, typename Replacement> | |
struct TupleReplaceImpl<0, Tuple<OneElement>, Replacement> | |
{ | |
LLAMA_FN_HOST_ACC_INLINE | |
auto operator()(Tuple<OneElement>, Replacement const replacement) | |
{ | |
return Tuple{replacement}; | |
} | |
}; | |
} // namespace internal | |
/// Creates a copy of a tuple with the element at position Pos replaced by replacement. | |
template<std::size_t Pos, typename Tuple, typename Replacement> | |
LLAMA_FN_HOST_ACC_INLINE auto tupleReplace(Tuple tuple, Replacement replacement) | |
{ | |
return internal::TupleReplaceImpl<Pos, Tuple, Replacement>()(tuple, replacement); | |
} | |
namespace internal | |
{ | |
template<size_t... Is, typename... Elements, typename Functor> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleTransformHelper( | |
std::index_sequence<Is...>, | |
const Tuple<Elements...>& tuple, | |
const Functor& functor) | |
{ | |
// FIXME(bgruber): nvcc fails to compile | |
// Tuple{functor(get<Is>(tuple))...} | |
return Tuple<decltype(functor(std::declval<Elements>()))...>{functor(get<Is>(tuple))...}; | |
} | |
} // namespace internal | |
/// Applies a functor to every element of a tuple, creating a new tuple with the result of the element | |
/// transformations. The functor needs to implement a template `operator()` to which all tuple elements are passed. | |
// TODO(bgruber): replace by mp11 version in Boost 1.74. | |
template<typename... Elements, typename Functor> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleTransform(const Tuple<Elements...>& tuple, const Functor& functor) | |
{ | |
return internal::tupleTransformHelper(std::make_index_sequence<sizeof...(Elements)>{}, tuple, functor); | |
} | |
/// Returns a copy of the tuple without the first element. | |
template<typename... Elements> | |
LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_front(const Tuple<Elements...>& tuple) | |
{ | |
return tuple.rest; | |
} | |
} // namespace llama | |
// == | |
// == ./Tuple.hpp == | |
// ============================================================================ | |
// #include <cstddef> // amalgamate: file already included | |
// #include <string> // amalgamate: file already included | |
// #include <type_traits> // amalgamate: file already included | |
namespace llama::mapping::tree | |
{ | |
template<typename T> | |
inline constexpr auto one = 1; | |
template<> | |
inline constexpr auto one<boost::mp11::mp_size_t<1>> = boost::mp11::mp_size_t<1>{}; | |
template<typename TIdentifier, typename TType, typename CountType = std::size_t> | |
struct Leaf | |
{ | |
using Identifier = TIdentifier; | |
using Type = TType; | |
const CountType count = one<CountType>; | |
}; | |
template<typename TIdentifier, typename TChildrenTuple, typename CountType = std::size_t> | |
struct Node | |
{ | |
using Identifier = TIdentifier; | |
using ChildrenTuple = TChildrenTuple; | |
const CountType count = one<CountType>; | |
const ChildrenTuple childs = {}; | |
}; | |
template<std::size_t ChildIndex = 0, typename ArrayIndexType = std::size_t> | |
struct TreeCoordElement | |
{ | |
static constexpr boost::mp11::mp_size_t<ChildIndex> childIndex = {}; | |
const ArrayIndexType arrayIndex = {}; | |
}; | |
template<std::size_t... Coords> | |
using TreeCoord = Tuple<TreeCoordElement<Coords, boost::mp11::mp_size_t<0>>...>; | |
namespace internal | |
{ | |
template<typename... Coords, std::size_t... Is> | |
auto treeCoordToString(Tuple<Coords...> treeCoord, std::index_sequence<Is...>) -> std::string | |
{ | |
auto s | |
= ((std::to_string(get<Is>(treeCoord).arrayIndex) + ":" + std::to_string(get<Is>(treeCoord).childIndex) | |
+ ", ") | |
+ ...); | |
s.resize(s.length() - 2); | |
return s; | |
} | |
} // namespace internal | |
template<typename TreeCoord> | |
auto treeCoordToString(TreeCoord treeCoord) -> std::string | |
{ | |
return std::string("[ ") | |
+ internal::treeCoordToString(treeCoord, std::make_index_sequence<std::tuple_size_v<TreeCoord>>{}) | |
+ std::string(" ]"); | |
} | |
namespace internal | |
{ | |
template<typename Tag, typename RecordDim, typename CountType> | |
struct CreateTreeElement | |
{ | |
using type = Leaf<Tag, RecordDim, boost::mp11::mp_size_t<1>>; | |
}; | |
template<typename Tag, typename... Fields, typename CountType> | |
struct CreateTreeElement<Tag, Record<Fields...>, CountType> | |
{ | |
using type = Node< | |
Tag, | |
Tuple< | |
typename CreateTreeElement<GetFieldTag<Fields>, GetFieldType<Fields>, boost::mp11::mp_size_t<1>>:: | |
type...>, | |
CountType>; | |
}; | |
template<typename Tag, typename ChildType, std::size_t Count, typename CountType> | |
struct CreateTreeElement<Tag, ChildType[Count], CountType> | |
{ | |
template<std::size_t... Is> | |
static auto createChildren(std::index_sequence<Is...>) | |
{ | |
return Tuple< | |
typename CreateTreeElement<RecordCoord<Is>, ChildType, boost::mp11::mp_size_t<1>>::type...>{}; | |
} | |
using type = Node<Tag, decltype(createChildren(std::make_index_sequence<Count>{})), CountType>; | |
}; | |
template<typename Leaf, std::size_t Count> | |
struct WrapInNNodes | |
{ | |
using type = Node<NoName, Tuple<typename WrapInNNodes<Leaf, Count - 1>::type>>; | |
}; | |
template<typename Leaf> | |
struct WrapInNNodes<Leaf, 0> | |
{ | |
using type = Leaf; | |
}; | |
template<typename RecordDim> | |
using TreeFromRecordDimImpl = typename CreateTreeElement<NoName, RecordDim, std::size_t>::type; | |
} // namespace internal | |
template<typename RecordDim> | |
using TreeFromRecordDim = internal::TreeFromRecordDimImpl<RecordDim>; | |
template<typename ArrayExtents, typename RecordDim> | |
using TreeFromDimensions = | |
typename internal::WrapInNNodes<internal::TreeFromRecordDimImpl<RecordDim>, ArrayExtents::rank - 1>::type; | |
template<typename RecordDim, std::size_t N, std::size_t Pos = 0> | |
LLAMA_FN_HOST_ACC_INLINE auto createTree(const ArrayIndex<N>& size) | |
{ | |
if constexpr(Pos == N - 1) | |
return TreeFromRecordDim<RecordDim>{size[N - 1]}; | |
else | |
{ | |
Tuple inner{createTree<RecordDim, N, Pos + 1>(size)}; | |
return Node<NoName, decltype(inner)>{size[Pos], inner}; | |
} | |
}; | |
namespace internal | |
{ | |
template< | |
typename ArrayIndex, | |
std::size_t... ADIndices, | |
std::size_t FirstRecordCoord, | |
std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE auto createTreeCoord( | |
const ArrayIndex& ai, | |
std::index_sequence<ADIndices...>, | |
RecordCoord<FirstRecordCoord, RecordCoords...>) | |
{ | |
return Tuple{ | |
TreeCoordElement<(ADIndices == ArrayIndex::rank - 1 ? FirstRecordCoord : 0)>{ai[ADIndices]}..., | |
TreeCoordElement<RecordCoords, boost::mp11::mp_size_t<0>>{}..., | |
TreeCoordElement<0, boost::mp11::mp_size_t<0>>{}}; | |
} | |
} // namespace internal | |
template<typename RecordCoord, typename ArrayIndex> | |
LLAMA_FN_HOST_ACC_INLINE auto createTreeCoord(const ArrayIndex& ai) | |
{ | |
return internal::createTreeCoord(ai, std::make_index_sequence<ArrayIndex::rank>{}, RecordCoord{}); | |
} | |
} // namespace llama::mapping::tree | |
// == | |
// == ./mapping/tree/TreeFromDimensions.hpp == | |
// ============================================================================ | |
namespace llama::mapping::tree::functor | |
{ | |
/// Functor for \ref tree::Mapping. Does nothing with the mapping tree. Is used for testing. | |
struct Idem | |
{ | |
template<typename Tree> | |
LLAMA_FN_HOST_ACC_INLINE auto basicToResult(const Tree& tree) const -> Tree | |
{ | |
return tree; | |
} | |
template<typename Tree, typename TreeCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const TreeCoord& basicCoord, const Tree&) const | |
-> TreeCoord | |
{ | |
return basicCoord; | |
} | |
template<typename Tree, typename TreeCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const TreeCoord& resultCoord, const Tree&) const | |
-> TreeCoord | |
{ | |
return resultCoord; | |
} | |
}; | |
/// Functor for \ref tree::Mapping. Moves all run time parts to the leaves, creating a SoA layout. | |
struct LeafOnlyRT | |
{ | |
template<typename Tree> | |
LLAMA_FN_HOST_ACC_INLINE auto basicToResult(Tree tree) const | |
{ | |
return basicToResultImpl(tree, 1); | |
} | |
template<typename Tree, typename BasicCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const BasicCoord& basicCoord, const Tree& tree) const | |
{ | |
return basicCoordToResultCoordImpl(basicCoord, tree); | |
} | |
template<typename Tree, typename ResultCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const ResultCoord& resultCoord, const Tree& /*tree*/) | |
const -> ResultCoord | |
{ | |
return resultCoord; | |
} | |
private: | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE static auto basicToResultImpl( | |
const Node<Identifier, Type, CountType>& node, | |
std::size_t arraySize) | |
{ | |
auto children = tupleTransform( | |
node.childs, | |
[&](auto element) { return basicToResultImpl(element, LLAMA_COPY(node.count) * arraySize); }); | |
return Node<Identifier, decltype(children), boost::mp11::mp_size_t<1>>{{}, children}; | |
} | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE static auto basicToResultImpl( | |
const Leaf<Identifier, Type, CountType>& leaf, | |
std::size_t arraySize) | |
{ | |
return Leaf<Identifier, Type, std::size_t>{LLAMA_COPY(leaf.count) * arraySize}; | |
} | |
template<typename BasicCoord, typename NodeOrLeaf> | |
LLAMA_FN_HOST_ACC_INLINE static auto basicCoordToResultCoordImpl( | |
const BasicCoord& basicCoord, | |
const NodeOrLeaf& nodeOrLeaf, | |
std::size_t arraySize = 0) | |
{ | |
if constexpr(std::tuple_size_v<BasicCoord> == 1) | |
return Tuple{TreeCoordElement<BasicCoord::FirstElement::childIndex>{ | |
arraySize + LLAMA_COPY(basicCoord.first.arrayIndex)}}; | |
else | |
{ | |
const auto& branch = get<BasicCoord::FirstElement::childIndex>(nodeOrLeaf.childs); | |
auto first = TreeCoordElement<BasicCoord::FirstElement::childIndex, boost::mp11::mp_size_t<0>>{}; | |
return tupleCat( | |
Tuple{first}, | |
basicCoordToResultCoordImpl( | |
basicCoord.rest, | |
branch, | |
(arraySize + LLAMA_COPY(basicCoord.first.arrayIndex)) * LLAMA_COPY(branch.count))); | |
} | |
} | |
}; | |
namespace internal | |
{ | |
template<typename TreeCoord, typename Node> | |
LLAMA_FN_HOST_ACC_INLINE auto getNode(const Node& node) | |
{ | |
if constexpr(std::is_same_v<TreeCoord, Tuple<>>) | |
return node; | |
else | |
return getNode<typename TreeCoord::RestTuple>(get<TreeCoord::FirstElement::childIndex>(node.childs)); | |
} | |
template<typename TreeCoord, typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto changeNodeRuntime( | |
const Node<Identifier, Type, CountType>& tree, | |
std::size_t newValue) | |
{ | |
if constexpr(std::is_same_v<TreeCoord, Tuple<>>) | |
return Node<Identifier, Type>{newValue, tree.childs}; | |
else | |
{ | |
auto current = get<TreeCoord::FirstElement::childIndex>(tree.childs); | |
auto replacement = changeNodeRuntime<typename TreeCoord::RestTuple>(current, newValue); | |
auto children = tupleReplace<TreeCoord::FirstElement::childIndex>(tree.childs, replacement); | |
return Node<Identifier, decltype(children)>{tree.count, children}; | |
} | |
} | |
template<typename TreeCoord, typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto changeNodeRuntime( | |
const Leaf<Identifier, Type, CountType>& /*tree*/, | |
std::size_t newValue) | |
{ | |
return Leaf<Identifier, Type, std::size_t>{newValue}; | |
} | |
struct ChangeNodeChildsRuntimeFunctor | |
{ | |
const std::size_t newValue; | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(const Node<Identifier, Type, CountType>& element) const | |
{ | |
return Node<Identifier, Type, std::size_t>{element.count * newValue, element.childs}; | |
} | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto operator()(const Leaf<Identifier, Type, CountType>& element) const | |
{ | |
return Leaf<Identifier, Type, std::size_t>{element.count * newValue}; | |
} | |
}; | |
template<typename TreeCoord, typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto changeNodeChildsRuntime( | |
const Node<Identifier, Type, CountType>& tree, | |
std::size_t newValue) | |
{ | |
if constexpr(std::is_same_v<TreeCoord, Tuple<>>) | |
{ | |
auto children = tupleTransform(tree.childs, ChangeNodeChildsRuntimeFunctor{newValue}); | |
return Node<Identifier, decltype(children)>{tree.count, children}; | |
} | |
else | |
{ | |
auto current = get<TreeCoord::FirstElement::childIndex>(tree.childs); | |
auto replacement = changeNodeChildsRuntime<typename TreeCoord::RestTuple>(current, newValue); | |
auto children = tupleReplace<TreeCoord::FirstElement::childIndex>(tree.childs, replacement); | |
return Node<Identifier, decltype(children)>{tree.count, children}; | |
} | |
} | |
template<typename TreeCoord, typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto changeNodeChildsRuntime( | |
const Leaf<Identifier, Type, CountType>& tree, | |
std::size_t /*newValue*/) | |
{ | |
return tree; | |
} | |
} // namespace internal | |
/// Functor for \ref tree::Mapping. Move the run time part of a node one level down in direction of the leaves by | |
/// the given amount (runtime or compile time value). | |
/// \tparam TreeCoord tree coordinate in the mapping tree which's run time part shall be moved down one level | |
/// \see tree::Mapping | |
template<typename TreeCoord, typename Amount = std::size_t> | |
struct MoveRTDown | |
{ | |
const Amount amount = {}; | |
template<typename Tree> | |
LLAMA_FN_HOST_ACC_INLINE auto basicToResult(const Tree& tree) const | |
{ | |
return internal::changeNodeChildsRuntime<TreeCoord>( | |
internal::changeNodeRuntime<TreeCoord>( | |
tree, | |
// NOLINTNEXTLINE(clang-analyzer-core.DivideZero) | |
(internal::getNode<TreeCoord>(tree).count + amount - 1) / amount), | |
amount); | |
} | |
template<typename Tree, typename BasicCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const BasicCoord& basicCoord, const Tree& tree) const | |
{ | |
return basicCoordToResultCoordImpl<TreeCoord>(basicCoord, tree); | |
} | |
template<typename Tree, typename ResultCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const ResultCoord& resultCoord, const Tree&) const | |
-> ResultCoord | |
{ | |
return resultCoord; | |
} | |
private: | |
template<typename InternalTreeCoord, typename BasicCoord, typename Tree> | |
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoordImpl(const BasicCoord& basicCoord, const Tree& tree) const | |
{ | |
if constexpr(std::is_same_v<InternalTreeCoord, Tuple<>>) | |
{ | |
if constexpr(std::is_same_v<BasicCoord, Tuple<>>) | |
return Tuple{}; | |
else | |
{ | |
const auto& childTree = get<BasicCoord::FirstElement::childIndex>(tree.childs); | |
const auto rt1 = basicCoord.first.arrayIndex / amount; | |
const auto rt2 | |
= basicCoord.first.arrayIndex % amount * childTree.count + basicCoord.rest.first.arrayIndex; | |
auto rt1Child = TreeCoordElement<BasicCoord::FirstElement::childIndex>{rt1}; | |
auto rt2Child = TreeCoordElement<BasicCoord::RestTuple::FirstElement::childIndex>{rt2}; | |
return tupleCat(Tuple{rt1Child}, tupleCat(Tuple{rt2Child}, pop_front(basicCoord.rest))); | |
} | |
} | |
else | |
{ | |
if constexpr(InternalTreeCoord::FirstElement::childIndex != BasicCoord::FirstElement::childIndex) | |
return basicCoord; | |
else | |
{ | |
auto rest = basicCoordToResultCoordImpl<typename InternalTreeCoord::RestTuple>( | |
pop_front(basicCoord), | |
get<BasicCoord::FirstElement::childIndex>(tree.childs)); | |
return tupleCat(Tuple{basicCoord.first}, rest); | |
} | |
} | |
} | |
}; | |
template<typename TreeCoord, std::size_t Amount> | |
using MoveRTDownFixed = MoveRTDown<TreeCoord, boost::mp11::mp_size_t<Amount>>; | |
} // namespace llama::mapping::tree::functor | |
// == | |
// == ./mapping/tree/Functors.hpp == | |
// ============================================================================ | |
// #include "TreeFromDimensions.hpp" // amalgamate: file already expanded | |
// ============================================================================ | |
// == ./mapping/tree/toString.hpp == | |
// == | |
// Copyright 2018 Alexander Matthes | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// #pragma once | |
// #include "TreeFromDimensions.hpp" // amalgamate: file already expanded | |
// #include <boost/core/demangle.hpp> // amalgamate: file already included | |
// #include <string> // amalgamate: file already included | |
#include <typeinfo> | |
namespace llama::mapping::tree | |
{ | |
template<typename T> | |
auto toString(T) -> std::string | |
{ | |
return "Unknown"; | |
} | |
// handles array indices | |
template<std::size_t I> | |
inline auto toString(RecordCoord<I>) -> std::string | |
{ | |
return ""; | |
} | |
inline auto toString(NoName) -> std::string | |
{ | |
return ""; | |
} | |
template<typename... Elements> | |
auto toString(Tuple<Elements...> tree) -> std::string | |
{ | |
if constexpr(sizeof...(Elements) > 1) | |
return toString(tree.first) + " , " + toString(tree.rest); | |
else | |
return toString(tree.first); | |
} | |
namespace internal | |
{ | |
inline void replace_all(std::string& str, const std::string& search, const std::string& replace) | |
{ | |
std::string::size_type i = 0; | |
while((i = str.find(search, i)) != std::string::npos) | |
{ | |
str.replace(i, search.length(), replace); | |
i += replace.length(); | |
} | |
} | |
template<typename NodeOrLeaf> | |
auto countAndIdentToString(const NodeOrLeaf& nodeOrLeaf) -> std::string | |
{ | |
auto r = std::to_string(nodeOrLeaf.count); | |
if constexpr(std::is_same_v<std::decay_t<decltype(nodeOrLeaf.count)>, std::size_t>) | |
r += "R"; // runtime | |
else | |
r += "C"; // compile time | |
r += std::string{" * "} + toString(typename NodeOrLeaf::Identifier{}); | |
return r; | |
} | |
} // namespace internal | |
template<typename Identifier, typename Type, typename CountType> | |
auto toString(const Node<Identifier, Type, CountType>& node) -> std::string | |
{ | |
return internal::countAndIdentToString(node) + "[ " + toString(node.childs) + " ]"; | |
} | |
template<typename Identifier, typename Type, typename CountType> | |
auto toString(const Leaf<Identifier, Type, CountType>& leaf) -> std::string | |
{ | |
auto raw = boost::core::demangle(typeid(Type).name()); | |
#ifdef _MSC_VER | |
internal::replace_all(raw, " __cdecl(void)", ""); | |
#endif | |
#ifdef __GNUG__ | |
internal::replace_all(raw, " ()", ""); | |
#endif | |
return internal::countAndIdentToString(leaf) + "(" + raw + ")"; | |
} | |
} // namespace llama::mapping::tree | |
// == | |
// == ./mapping/tree/toString.hpp == | |
// ============================================================================ | |
// #include <type_traits> // amalgamate: file already included | |
namespace llama::mapping::tree | |
{ | |
namespace internal | |
{ | |
template<typename Tree, typename TreeOperationList> | |
struct MergeFunctors | |
{ | |
}; | |
template<typename Tree, typename... Operations> | |
struct MergeFunctors<Tree, Tuple<Operations...>> | |
{ | |
boost::mp11::mp_first<Tuple<Operations...>> operation = {}; | |
using ResultTree = decltype(operation.basicToResult(Tree())); | |
ResultTree treeAfterOp; | |
MergeFunctors<ResultTree, boost::mp11::mp_drop_c<Tuple<Operations...>, 1>> next = {}; | |
MergeFunctors() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
MergeFunctors(const Tree& tree, const Tuple<Operations...>& treeOperationList) | |
: operation(treeOperationList.first) | |
, treeAfterOp(operation.basicToResult(tree)) | |
, next(treeAfterOp, pop_front(treeOperationList)) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
auto basicToResult(const Tree& tree) const | |
{ | |
if constexpr(sizeof...(Operations) > 1) | |
return next.basicToResult(treeAfterOp); | |
else if constexpr(sizeof...(Operations) == 1) | |
return operation.basicToResult(tree); | |
else | |
return tree; | |
} | |
template<typename TreeCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const TreeCoord& basicCoord, const Tree& tree) const | |
{ | |
if constexpr(sizeof...(Operations) >= 1) | |
return next.basicCoordToResultCoord( | |
operation.basicCoordToResultCoord(basicCoord, tree), | |
treeAfterOp); | |
else | |
return basicCoord; | |
} | |
template<typename TreeCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const TreeCoord& resultCoord, const Tree& tree) const | |
{ | |
if constexpr(sizeof...(Operations) >= 1) | |
return next.resultCoordToBasicCoord( | |
operation.resultCoordToBasicCoord(resultCoord, tree), | |
operation.basicToResult(tree)); | |
else | |
return resultCoord; | |
} | |
}; | |
template<typename Tree> | |
struct MergeFunctors<Tree, Tuple<>> | |
{ | |
MergeFunctors() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
MergeFunctors(const Tree&, const Tuple<>&) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
auto basicToResult(const Tree& tree) const | |
{ | |
return tree; | |
} | |
template<typename TreeCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(TreeCoord const& basicCoord, Tree const& /*tree*/) | |
const -> TreeCoord | |
{ | |
return basicCoord; | |
} | |
template<typename TreeCoord> | |
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(TreeCoord const& resultCoord, Tree const& /*tree*/) | |
const -> TreeCoord | |
{ | |
return resultCoord; | |
} | |
}; | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Node<Identifier, Type, CountType>& node) -> std::size_t; | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Leaf<Identifier, Type, CountType>& leaf) -> std::size_t; | |
template<typename... Children, std::size_t... Is, typename Count> | |
LLAMA_FN_HOST_ACC_INLINE auto getChildrenBlobSize( | |
const Tuple<Children...>& childs, | |
std::index_sequence<Is...> /*ii*/, | |
const Count& count) -> std::size_t | |
{ | |
return count * (getTreeBlobSize(get<Is>(childs)) + ...); | |
} | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Node<Identifier, Type, CountType>& node) -> std::size_t | |
{ | |
constexpr std::size_t childCount = boost::mp11::mp_size<std::decay_t<decltype(node.childs)>>::value; | |
return getChildrenBlobSize(node.childs, std::make_index_sequence<childCount>{}, LLAMA_COPY(node.count)); | |
} | |
template<typename Identifier, typename Type, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Leaf<Identifier, Type, CountType>& leaf) -> std::size_t | |
{ | |
return leaf.count * sizeof(Type); | |
} | |
template<typename Childs, typename CountType> | |
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Childs& childs, const CountType& count) -> std::size_t | |
{ | |
return getTreeBlobSize(Node<NoName, Childs, CountType>{count, childs}); | |
} | |
template<std::size_t MaxPos, typename Identifier, typename Type, typename CountType, std::size_t... Is> | |
LLAMA_FN_HOST_ACC_INLINE auto sumChildrenSmallerThan( | |
const Node<Identifier, Type, CountType>& node, | |
std::index_sequence<Is...>) -> std::size_t | |
{ | |
return ((getTreeBlobSize(get<Is>(node.childs)) * (Is < MaxPos)) + ...); | |
} | |
template<typename Tree, typename... Coords> | |
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobByte(const Tree& tree, const Tuple<Coords...>& treeCoord) | |
-> std::size_t | |
{ | |
const auto firstArrayIndex = treeCoord.first.arrayIndex; | |
if constexpr(sizeof...(Coords) > 1) | |
{ | |
constexpr auto firstChildIndex = decltype(treeCoord.first.childIndex)::value; | |
return getTreeBlobSize(tree.childs, firstArrayIndex) | |
+ sumChildrenSmallerThan<firstChildIndex>( | |
tree, | |
std::make_index_sequence<std::tuple_size_v<typename Tree::ChildrenTuple>>{}) | |
+ getTreeBlobByte(get<firstChildIndex>(tree.childs), treeCoord.rest); | |
} | |
else | |
return sizeof(typename Tree::Type) * firstArrayIndex; | |
} | |
} // namespace internal | |
/// An experimental attempt to provide a general purpose description of a mapping. \ref Array and record | |
/// dimensions are represented by a compile time tree data structure. This tree is mapped into memory by means of a | |
/// breadth-first tree traversal. By specifying additional tree operations, the tree can be modified at compile | |
/// time before being mapped to memory. | |
template<typename TArrayExtents, typename TRecordDim, typename TreeOperationList> | |
struct Mapping : private TArrayExtents | |
{ | |
using ArrayExtents = TArrayExtents; | |
using ArrayIndex = typename ArrayExtents::Index; | |
using RecordDim = TRecordDim; | |
using BasicTree = TreeFromDimensions<ArrayExtents, RecordDim>; | |
// TODO(bgruber): , support more than one blob | |
static constexpr std::size_t blobCount = 1; | |
using MergedFunctors = internal::MergeFunctors<BasicTree, TreeOperationList>; | |
BasicTree basicTree; | |
MergedFunctors mergedFunctors; | |
using ResultTree = decltype(mergedFunctors.basicToResult(basicTree)); | |
ResultTree resultTree; | |
Mapping() = default; | |
LLAMA_FN_HOST_ACC_INLINE | |
Mapping(ArrayExtents extents, TreeOperationList treeOperationList, RecordDim = {}) | |
: ArrayExtents(extents) | |
, basicTree(createTree<RecordDim>(extents.toArray())) | |
, mergedFunctors(basicTree, treeOperationList) | |
, resultTree(mergedFunctors.basicToResult(basicTree)) | |
{ | |
} | |
LLAMA_FN_HOST_ACC_INLINE auto extents() const -> ArrayExtents | |
{ | |
return ArrayExtents{*this}; | |
} | |
LLAMA_FN_HOST_ACC_INLINE | |
auto blobSize(std::size_t const) const -> std::size_t | |
{ | |
return internal::getTreeBlobSize(resultTree); | |
} | |
template<std::size_t... RecordCoords> | |
LLAMA_FN_HOST_ACC_INLINE auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const | |
-> NrAndOffset | |
{ | |
auto const basicTreeCoord = createTreeCoord<RecordCoord<RecordCoords...>>(ai); | |
auto const resultTreeCoord = mergedFunctors.basicCoordToResultCoord(basicTreeCoord, basicTree); | |
const auto offset = internal::getTreeBlobByte(resultTree, resultTreeCoord); | |
return {0, offset}; | |
} | |
}; | |
} // namespace llama::mapping::tree | |
// == | |
// == ./mapping/tree/Mapping.hpp == | |
// ============================================================================ | |
#ifdef __NVCC__ | |
# pragma pop | |
#endif | |
// == | |
// == ./llama.hpp == | |
// ============================================================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment