Last active
August 4, 2023 09:15
-
-
Save bradphelan/726908c726ff4c134791c908210064b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <type_traits> | |
#include <iterator> | |
#include <iostream> | |
#include <fstream> | |
#include <mutex> | |
#include <stack> | |
#include <queue> | |
#include <vector> | |
#include <algorithm> | |
#include <memory> | |
#include <atomic> | |
#include <thread> | |
#include <future> | |
#include <functional> | |
#include <unordered_map> | |
#include <unordered_set> | |
#include <sstream> | |
#include <list> | |
#include <numeric> | |
#include <random> | |
#include <iomanip> | |
#include <cassert> | |
#include <cmath> | |
#include <array> | |
#include <string> | |
#include <variant> | |
#include <optional> | |
#include <cstdlib> | |
#include <cstdio> | |
#include <string> | |
#define TF_OS_LINUX 0 | |
#define TF_OS_DRAGONFLY 0 | |
#define TF_OS_FREEBSD 0 | |
#define TF_OS_NETBSD 0 | |
#define TF_OS_OPENBSD 0 | |
#define TF_OS_DARWIN 0 | |
#define TF_OS_WINDOWS 0 | |
#define TF_OS_CNK 0 | |
#define TF_OS_HURD 0 | |
#define TF_OS_SOLARIS 0 | |
#define TF_OS_UNIX 0 | |
#ifdef _WIN32 | |
#undef TF_OS_WINDOWS | |
#define TF_OS_WINDOWS 1 | |
#endif | |
#ifdef __CYGWIN__ | |
#undef TF_OS_WINDOWS | |
#define TF_OS_WINDOWS 1 | |
#endif | |
#if (defined __APPLE__ && defined __MACH__) | |
#undef TF_OS_DARWIN | |
#define TF_OS_DARWIN 1 | |
#endif | |
// in some ppc64 linux installations, only the second condition is met | |
#if (defined __linux) | |
#undef TF_OS_LINUX | |
#define TF_OS_LINUX 1 | |
#elif (defined __linux__) | |
#undef TF_OS_LINUX | |
#define TF_OS_LINUX 1 | |
#else | |
#endif | |
#if (defined __DragonFly__) | |
#undef TF_OS_DRAGONFLY | |
#define TF_OS_DRAGONFLY 1 | |
#endif | |
#if (defined __FreeBSD__) | |
#undef TF_OS_FREEBSD | |
#define TF_OS_FREEBSD 1 | |
#endif | |
#if (defined __NetBSD__) | |
#undef TF_OS_NETBSD | |
#define TF_OS_NETBSD 1 | |
#endif | |
#if (defined __OpenBSD__) | |
#undef TF_OS_OPENBSD | |
#define TF_OS_OPENBSD 1 | |
#endif | |
#if (defined __bgq__) | |
#undef TF_OS_CNK | |
#define TF_OS_CNK 1 | |
#endif | |
#if (defined __GNU__) | |
#undef TF_OS_HURD | |
#define TF_OS_HURD 1 | |
#endif | |
#if (defined __sun) | |
#undef TF_OS_SOLARIS | |
#define TF_OS_SOLARIS 1 | |
#endif | |
#if (1 != \ | |
TF_OS_LINUX + TF_OS_DRAGONFLY + TF_OS_FREEBSD + TF_OS_NETBSD + \ | |
TF_OS_OPENBSD + TF_OS_DARWIN + TF_OS_WINDOWS + TF_OS_HURD + \ | |
TF_OS_SOLARIS) | |
#define TF_OS_UNKNOWN 1 | |
#endif | |
#if TF_OS_LINUX || TF_OS_DRAGONFLY || TF_OS_FREEBSD || TF_OS_NETBSD || \ | |
TF_OS_OPENBSD || TF_OS_DARWIN || TF_OS_HURD || TF_OS_SOLARIS | |
#undef TF_OS_UNIX | |
#define TF_OS_UNIX 1 | |
#endif | |
//----------------------------------------------------------------------------- | |
// Cache line alignment | |
//----------------------------------------------------------------------------- | |
#if defined(__i386__) || defined(__x86_64__) | |
#define TF_CACHELINE_SIZE 64 | |
#elif defined(__powerpc64__) | |
// TODO | |
// This is the L1 D-cache line size of our Power7 machines. | |
// Need to check if this is appropriate for other PowerPC64 systems. | |
#define TF_CACHELINE_SIZE 128 | |
#elif defined(__arm__) | |
// Cache line sizes for ARM: These values are not strictly correct since | |
// cache line sizes depend on implementations, not architectures. | |
// There are even implementations with cache line sizes configurable | |
// at boot time. | |
#if defined(__ARM_ARCH_5T__) | |
#define TF_CACHELINE_SIZE 32 | |
#elif defined(__ARM_ARCH_7A__) | |
#define TF_CACHELINE_SIZE 64 | |
#endif | |
#endif | |
#ifndef TF_CACHELINE_SIZE | |
// A reasonable default guess. Note that overestimates tend to waste more | |
// space, while underestimates tend to waste more time. | |
#define TF_CACHELINE_SIZE 64 | |
#endif | |
//----------------------------------------------------------------------------- | |
// pause | |
//----------------------------------------------------------------------------- | |
//#if __has_include (<immintrin.h>) | |
// #define TF_HAS_MM_PAUSE 1 | |
// #include <immintrin.h> | |
//#endif | |
namespace tf { | |
// Struct: CachelineAligned | |
// Due to prefetch, we typically do 2x cacheline for the alignment. | |
template <typename T> | |
struct CachelineAligned { | |
alignas (2*TF_CACHELINE_SIZE) T data; | |
}; | |
// Function: get_env | |
inline std::string get_env(const std::string& str) { | |
#ifdef _MSC_VER | |
char *ptr = nullptr; | |
size_t len = 0; | |
if(_dupenv_s(&ptr, &len, str.c_str()) == 0 && ptr != nullptr) { | |
std::string res(ptr, len); | |
std::free(ptr); | |
return res; | |
} | |
return ""; | |
#else | |
auto ptr = std::getenv(str.c_str()); | |
return ptr ? ptr : ""; | |
#endif | |
} | |
// Function: has_env | |
inline bool has_env(const std::string& str) { | |
#ifdef _MSC_VER | |
char *ptr = nullptr; | |
size_t len = 0; | |
if(_dupenv_s(&ptr, &len, str.c_str()) == 0 && ptr != nullptr) { | |
std::string res(ptr, len); | |
std::free(ptr); | |
return true; | |
} | |
return false; | |
#else | |
auto ptr = std::getenv(str.c_str()); | |
return ptr ? true : false; | |
#endif | |
} | |
// Procedure: relax_cpu | |
//inline void relax_cpu() { | |
//#ifdef TF_HAS_MM_PAUSE | |
// _mm_pause(); | |
//#endif | |
//} | |
} // end of namespace tf ----------------------------------------------------- | |
namespace tf { | |
//----------------------------------------------------------------------------- | |
// Traits | |
//----------------------------------------------------------------------------- | |
//// Struct: dependent_false | |
//template <typename... T> | |
//struct dependent_false { | |
// static constexpr bool value = false; | |
//}; | |
// | |
//template <typename... T> | |
//constexpr auto dependent_false_v = dependent_false<T...>::value; | |
template<typename> inline constexpr bool dependent_false_v = false; | |
// ---------------------------------------------------------------------------- | |
// is_pod | |
//----------------------------------------------------------------------------- | |
template <typename T> | |
struct is_pod { | |
static const bool value = std::is_trivial_v<T> && | |
std::is_standard_layout_v<T>; | |
}; | |
template <typename T> | |
constexpr bool is_pod_v = is_pod<T>::value; | |
//----------------------------------------------------------------------------- | |
// NoInit | |
//----------------------------------------------------------------------------- | |
template <typename T> | |
struct NoInit { | |
//static_assert(is_pod_v<T>, "NoInit only supports POD type"); | |
// constructor without initialization | |
NoInit () noexcept {} | |
// implicit conversion T -> NoInit<T> | |
constexpr NoInit (T value) noexcept : v{value} {} | |
// implicit conversion NoInit<T> -> T | |
constexpr operator T () const noexcept { return v; } | |
T v; | |
}; | |
//----------------------------------------------------------------------------- | |
// Move-On-Copy | |
//----------------------------------------------------------------------------- | |
// Struct: MoveOnCopyWrapper | |
template <typename T> | |
struct MoC { | |
MoC(T&& rhs) : object(std::move(rhs)) {} | |
MoC(const MoC& other) : object(std::move(other.object)) {} | |
T& get() { return object; } | |
mutable T object; | |
}; | |
template <typename T> | |
auto make_moc(T&& m) { | |
return MoC<T>(std::forward<T>(m)); | |
} | |
//----------------------------------------------------------------------------- | |
// Visitors. | |
//----------------------------------------------------------------------------- | |
//// Overloadded. | |
//template <typename... Ts> | |
//struct Visitors : Ts... { | |
// using Ts::operator()... ; | |
//}; | |
// | |
//template <typename... Ts> | |
//Visitors(Ts...) -> Visitors<Ts...>; | |
// ---------------------------------------------------------------------------- | |
// std::variant | |
// ---------------------------------------------------------------------------- | |
template <typename T, typename> | |
struct get_index; | |
template <size_t I, typename... Ts> | |
struct get_index_impl {}; | |
template <size_t I, typename T, typename... Ts> | |
struct get_index_impl<I, T, T, Ts...> : std::integral_constant<size_t, I>{}; | |
template <size_t I, typename T, typename U, typename... Ts> | |
struct get_index_impl<I, T, U, Ts...> : get_index_impl<I+1, T, Ts...>{}; | |
template <typename T, typename... Ts> | |
struct get_index<T, std::variant<Ts...>> : get_index_impl<0, T, Ts...>{}; | |
template <typename T, typename... Ts> | |
constexpr auto get_index_v = get_index<T, Ts...>::value; | |
// ---------------------------------------------------------------------------- | |
// unwrap_reference | |
// ---------------------------------------------------------------------------- | |
template <class T> | |
struct unwrap_reference { using type = T; }; | |
template <class U> | |
struct unwrap_reference<std::reference_wrapper<U>> { using type = U&; }; | |
template<class T> | |
using unwrap_reference_t = typename unwrap_reference<T>::type; | |
template< class T > | |
struct unwrap_ref_decay : unwrap_reference<std::decay_t<T>> {}; | |
template<class T> | |
using unwrap_ref_decay_t = typename unwrap_ref_decay<T>::type; | |
// ---------------------------------------------------------------------------- | |
// stateful iterators | |
// ---------------------------------------------------------------------------- | |
// STL-styled iterator | |
template <typename B, typename E> | |
struct stateful_iterator { | |
using TB = std::decay_t<unwrap_ref_decay_t<B>>; | |
using TE = std::decay_t<unwrap_ref_decay_t<E>>; | |
static_assert(std::is_same_v<TB, TE>, "decayed iterator types must match"); | |
using type = TB; | |
}; | |
template <typename B, typename E> | |
using stateful_iterator_t = typename stateful_iterator<B, E>::type; | |
// raw integral index | |
template <typename B, typename E, typename S> | |
struct stateful_index { | |
using TB = std::decay_t<unwrap_ref_decay_t<B>>; | |
using TE = std::decay_t<unwrap_ref_decay_t<E>>; | |
using TS = std::decay_t<unwrap_ref_decay_t<S>>; | |
static_assert( | |
std::is_integral_v<TB>, "decayed beg index must be an integral type" | |
); | |
static_assert( | |
std::is_integral_v<TE>, "decayed end index must be an integral type" | |
); | |
static_assert( | |
std::is_integral_v<TS>, "decayed step must be an integral type" | |
); | |
static_assert( | |
std::is_same_v<TB, TE> && std::is_same_v<TE, TS>, | |
"decayed index and step types must match" | |
); | |
using type = TB; | |
}; | |
template <typename B, typename E, typename S> | |
using stateful_index_t = typename stateful_index<B, E, S>::type; | |
// ---------------------------------------------------------------------------- | |
// visit a tuple with a functor at runtime | |
// ---------------------------------------------------------------------------- | |
template <typename Func, typename Tuple, size_t N = 0> | |
void visit_tuple(Func func, Tuple& tup, size_t idx) { | |
if (N == idx) { | |
std::invoke(func, std::get<N>(tup)); | |
return; | |
} | |
if constexpr (N + 1 < std::tuple_size_v<Tuple>) { | |
return visit_tuple<Func, Tuple, N + 1>(func, tup, idx); | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// unroll loop | |
// ---------------------------------------------------------------------------- | |
// Template unrolled looping construct. | |
template<auto beg, auto end, auto step, bool valid = (beg < end)> | |
struct Unroll { | |
template<typename F> | |
static void eval(F f) { | |
f(beg); | |
Unroll<beg + step, end, step>::eval(f); | |
} | |
}; | |
template<auto beg, auto end, auto step> | |
struct Unroll<beg, end, step, false> { | |
template<typename F> | |
static void eval(F) { } | |
}; | |
template<auto beg, auto end, auto step, typename F> | |
void unroll(F f) { | |
Unroll<beg, end, step>::eval(f); | |
} | |
// ---------------------------------------------------------------------------- | |
// make types of variant unique | |
// ---------------------------------------------------------------------------- | |
template <typename T, typename... Ts> | |
struct filter_duplicates { using type = T; }; | |
template <template <typename...> class C, typename... Ts, typename U, typename... Us> | |
struct filter_duplicates<C<Ts...>, U, Us...> | |
: std::conditional_t<(std::is_same_v<U, Ts> || ...) | |
, filter_duplicates<C<Ts...>, Us...> | |
, filter_duplicates<C<Ts..., U>, Us...>> {}; | |
template <typename T> | |
struct unique_variant; | |
template <typename... Ts> | |
struct unique_variant<std::variant<Ts...>> : filter_duplicates<std::variant<>, Ts...> {}; | |
template <typename T> | |
using unique_variant_t = typename unique_variant<T>::type; | |
// ---------------------------------------------------------------------------- | |
// check if it is default compare | |
// ---------------------------------------------------------------------------- | |
template <typename T> struct is_std_compare : std::false_type { }; | |
template <typename T> struct is_std_compare<std::less<T>> : std::true_type { }; | |
template <typename T> struct is_std_compare<std::greater<T>> : std::true_type { }; | |
template <typename T> | |
constexpr static bool is_std_compare_v = is_std_compare<T>::value; | |
// ---------------------------------------------------------------------------- | |
// check if all types are the same | |
// ---------------------------------------------------------------------------- | |
template<bool...> | |
struct bool_pack; | |
template<bool... bs> | |
using all_true = std::is_same<bool_pack<bs..., true>, bool_pack<true, bs...>>; | |
template <typename T, typename... Ts> | |
using all_same = all_true<std::is_same_v<T, Ts>...>; | |
template <typename T, typename... Ts> | |
constexpr bool all_same_v = all_same<T, Ts...>::value; | |
} // end of namespace tf. ---------------------------------------------------- | |
#include <cstddef> | |
#include <type_traits> | |
namespace tf { | |
template <typename T> | |
constexpr std::enable_if_t<std::is_integral<std::decay_t<T>>::value, bool> | |
is_range_invalid(T beg, T end, T step) { | |
return ((step == 0 && beg != end) || | |
(beg < end && step <= 0) || | |
(beg > end && step >= 0)); | |
} | |
template <typename T> | |
constexpr std::enable_if_t<std::is_integral<std::decay_t<T>>::value, size_t> | |
distance(T beg, T end, T step) { | |
return (end - beg + step + (step > 0 ? -1 : 1)) / step; | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
// 2020/03/13 - modified by Tsung-Wei Huang | |
// - fixed bug in aligning memory | |
// | |
// 2020/02/02 - modified by Tsung-Wei Huang | |
// - new implementation motivated by Hoard | |
// | |
// 2019/07/10 - modified by Tsung-Wei Huang | |
// - replace raw pointer with smart pointer | |
// | |
// 2019/06/13 - created by Tsung-Wei Huang | |
// - implemented an object pool class | |
#include <thread> | |
#include <atomic> | |
#include <mutex> | |
#include <vector> | |
#include <cassert> | |
#include <cstddef> | |
namespace tf { | |
#define TF_ENABLE_POOLABLE_ON_THIS \ | |
template <typename T, size_t S> friend class ObjectPool; \ | |
void* _object_pool_block | |
// Class: ObjectPool | |
// | |
// The class implements an efficient thread-safe object pool motivated | |
// by the Hoard memory allocator algorithm. | |
// Different from the normal memory allocator, object pool allocates | |
// only one object at a time. | |
// | |
// Internall, we use the following variables to maintain blocks and heaps: | |
// X: size in byte of a item slot | |
// M: number of items per block | |
// F: emptiness threshold | |
// B: number of bins per local heap (bin[B-1] is the full list) | |
// W: number of items per bin | |
// K: shrinkness constant | |
// | |
// Example scenario 1: | |
// M = 30 | |
// F = 4 | |
// W = (30+4-1)/4 = 8 | |
// | |
// b0: 0, 1, 2, 3, 4, 5, 6, 7 | |
// b1: 8, 9, 10, 11, 12, 13, 14, 15 | |
// b2: 16, 17, 18, 19, 20, 21, 22, 23 | |
// b3: 24, 25, 26, 27, 28, 29 | |
// b4: 30 (anything equal to M) | |
// | |
// Example scenario 2: | |
// M = 32 | |
// F = 4 | |
// W = (32+4-1)/4 = 8 | |
// b0: 0, 1, 2, 3, 4, 5, 6, 7 | |
// b1: 8, 9, 10, 11, 12, 13, 14, 15 | |
// b2: 16, 17, 18, 19, 20, 21, 22, 23 | |
// b3: 24, 25, 26, 27, 28, 29, 30, 31 | |
// b4: 32 (anything equal to M) | |
// | |
template <typename T, size_t S = 65536> | |
class ObjectPool { | |
// the data column must be sufficient to hold the pointer in freelist | |
constexpr static size_t X = (std::max)(sizeof(T*), sizeof(T)); | |
//constexpr static size_t X = sizeof(long double) + std::max(sizeof(T*), sizeof(T)); | |
//constexpr static size_t M = (S - offsetof(Block, data)) / X; | |
constexpr static size_t M = S / X; | |
constexpr static size_t F = 4; | |
constexpr static size_t B = F + 1; | |
constexpr static size_t W = (M + F - 1) / F; | |
constexpr static size_t K = 4; | |
static_assert( | |
S && (!(S & (S-1))), "block size S must be a power of two" | |
); | |
static_assert( | |
M >= 128, "block size S must be larger enough to pool at least 128 objects" | |
); | |
struct Blocklist { | |
Blocklist* prev; | |
Blocklist* next; | |
}; | |
struct GlobalHeap { | |
std::mutex mutex; | |
Blocklist list; | |
}; | |
struct LocalHeap { | |
std::mutex mutex; | |
Blocklist lists[B]; | |
size_t u {0}; | |
size_t a {0}; | |
}; | |
struct Block { | |
std::atomic<LocalHeap*> heap; | |
Blocklist list_node; | |
size_t i; | |
size_t u; | |
T* top; | |
// long double padding; | |
char data[S]; | |
}; | |
public: | |
/** | |
@brief constructs an object pool from a number of anticipated threads | |
*/ | |
explicit ObjectPool(unsigned = std::thread::hardware_concurrency()); | |
/** | |
@brief destructs the object pool | |
*/ | |
~ObjectPool(); | |
/** | |
@brief acquires a pointer to a object constructed from a given argument list | |
*/ | |
template <typename... ArgsT> | |
T* animate(ArgsT&&... args); | |
/** | |
@brief recycles a object pointed by @c ptr and destroys it | |
*/ | |
void recycle(T* ptr); | |
size_t num_bins_per_local_heap() const; | |
size_t num_objects_per_bin() const; | |
size_t num_objects_per_block() const; | |
size_t num_available_objects() const; | |
size_t num_allocated_objects() const; | |
size_t capacity() const; | |
size_t num_local_heaps() const; | |
size_t num_global_heaps() const; | |
size_t num_heaps() const; | |
float emptiness_threshold() const; | |
private: | |
const size_t _lheap_mask; | |
GlobalHeap _gheap; | |
std::vector<LocalHeap> _lheaps; | |
LocalHeap& _this_heap(); | |
constexpr unsigned _next_pow2(unsigned n) const; | |
template <class P, class Q> | |
constexpr size_t _offset_in_class(const Q P::*member) const; | |
template <class P, class Q> | |
constexpr P* _parent_class_of(Q*, const Q P::*member); | |
template <class P, class Q> | |
constexpr P* _parent_class_of(const Q*, const Q P::*member) const; | |
constexpr Block* _block_of(Blocklist*); | |
constexpr Block* _block_of(const Blocklist*) const; | |
size_t _bin(size_t) const; | |
T* _allocate(Block*); | |
void _deallocate(Block*, T*); | |
void _blocklist_init_head(Blocklist*); | |
void _blocklist_add_impl(Blocklist*, Blocklist*, Blocklist*); | |
void _blocklist_push_front(Blocklist*, Blocklist*); | |
void _blocklist_push_back(Blocklist*, Blocklist*); | |
void _blocklist_del_impl(Blocklist*, Blocklist*); | |
void _blocklist_del(Blocklist*); | |
void _blocklist_replace(Blocklist*, Blocklist*); | |
void _blocklist_move_front(Blocklist*, Blocklist*); | |
void _blocklist_move_back(Blocklist*, Blocklist*); | |
bool _blocklist_is_first(const Blocklist*, const Blocklist*); | |
bool _blocklist_is_last(const Blocklist*, const Blocklist*); | |
bool _blocklist_is_empty(const Blocklist*); | |
bool _blocklist_is_singular(const Blocklist*); | |
template <typename C> | |
void _for_each_block_safe(Blocklist*, C&&); | |
template <typename C> | |
void _for_each_block(Blocklist*, C&&); | |
}; | |
// ---------------------------------------------------------------------------- | |
// ObjectPool definition | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename T, size_t S> | |
ObjectPool<T, S>::ObjectPool(unsigned t) : | |
//_heap_mask {(_next_pow2(t) << 1) - 1u}, | |
//_heap_mask { _next_pow2(t<<1) - 1u }, | |
//_heap_mask {(t << 1) - 1}, | |
_lheap_mask { _next_pow2((t+1) << 1) - 1 }, | |
_lheaps { _lheap_mask + 1 } { | |
_blocklist_init_head(&_gheap.list); | |
for(auto& h : _lheaps) { | |
for(size_t i=0; i<B; ++i) { | |
_blocklist_init_head(&h.lists[i]); | |
} | |
} | |
} | |
// Destructor | |
template <typename T, size_t S> | |
ObjectPool<T, S>::~ObjectPool() { | |
// clear local heaps | |
for(auto& h : _lheaps) { | |
for(size_t i=0; i<B; ++i) { | |
_for_each_block_safe(&h.lists[i], [] (Block* b) { | |
//std::free(b); | |
delete b; | |
}); | |
} | |
} | |
// clear global heap | |
_for_each_block_safe(&_gheap.list, [] (Block* b) { | |
//std::free(b); | |
delete b; | |
}); | |
} | |
// Function: num_bins_per_local_heap | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_bins_per_local_heap() const { | |
return B; | |
} | |
// Function: num_objects_per_bin | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_objects_per_bin() const { | |
return W; | |
} | |
// Function: num_objects_per_block | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_objects_per_block() const { | |
return M; | |
} | |
// Function: emptiness_threshold | |
template <typename T, size_t S> | |
float ObjectPool<T, S>::emptiness_threshold() const { | |
return 1.0f/F; | |
} | |
// Function: num_global_heaps | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_global_heaps() const { | |
return 1; | |
} | |
// Function: num_lheaps | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_local_heaps() const { | |
return _lheaps.size(); | |
} | |
// Function: num_heaps | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_heaps() const { | |
return _lheaps.size() + 1; | |
} | |
// Function: capacity | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::capacity() const { | |
size_t n = 0; | |
// global heap | |
for(auto p=_gheap.list.next; p!=&_gheap.list; p=p->next) { | |
n += M; | |
}; | |
// local heap | |
for(auto& h : _lheaps) { | |
n += h.a; | |
} | |
return n; | |
} | |
// Function: num_available_objects | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_available_objects() const { | |
size_t n = 0; | |
// global heap | |
for(auto p=_gheap.list.next; p!=&_gheap.list; p=p->next) { | |
n += (M - _block_of(p)->u); | |
}; | |
// local heap | |
for(auto& h : _lheaps) { | |
n += (h.a - h.u); | |
} | |
return n; | |
} | |
// Function: num_allocated_objects | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::num_allocated_objects() const { | |
size_t n = 0; | |
// global heap | |
for(auto p=_gheap.list.next; p!=&_gheap.list; p=p->next) { | |
n += _block_of(p)->u; | |
}; | |
// local heap | |
for(auto& h : _lheaps) { | |
n += h.u; | |
} | |
return n; | |
} | |
// Function: _bin | |
template <typename T, size_t S> | |
size_t ObjectPool<T, S>::_bin(size_t u) const { | |
return u == M ? F : u/W; | |
} | |
// Function: _offset_in_class | |
template <typename T, size_t S> | |
template <class P, class Q> | |
constexpr size_t ObjectPool<T, S>::_offset_in_class( | |
const Q P::*member) const { | |
return (size_t) &( reinterpret_cast<P*>(0)->*member); | |
} | |
// C macro: parent_class_of(list_pointer, Block, list) | |
// C++: parent_class_of(list_pointer, &Block::list) | |
template <typename T, size_t S> | |
template <class P, class Q> | |
constexpr P* ObjectPool<T, S>::_parent_class_of( | |
Q* ptr, const Q P::*member | |
) { | |
return (P*)( (char*)ptr - _offset_in_class(member)); | |
} | |
// Function: _parent_class_of | |
template <typename T, size_t S> | |
template <class P, class Q> | |
constexpr P* ObjectPool<T, S>::_parent_class_of( | |
const Q* ptr, const Q P::*member | |
) const { | |
return (P*)( (char*)ptr - _offset_in_class(member)); | |
} | |
// Function: _block_of | |
template <typename T, size_t S> | |
constexpr typename ObjectPool<T, S>::Block* | |
ObjectPool<T, S>::_block_of(Blocklist* list) { | |
return _parent_class_of(list, &Block::list_node); | |
} | |
// Function: _block_of | |
template <typename T, size_t S> | |
constexpr typename ObjectPool<T, S>::Block* | |
ObjectPool<T, S>::_block_of(const Blocklist* list) const { | |
return _parent_class_of(list, &Block::list_node); | |
} | |
// Procedure: initialize a list head | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_init_head(Blocklist *list) { | |
list->next = list; | |
list->prev = list; | |
} | |
// Procedure: _blocklist_add_impl | |
// Insert a new entry between two known consecutive entries. | |
// | |
// This is only for internal list manipulation where we know | |
// the prev/next entries already! | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_add_impl( | |
Blocklist *curr, Blocklist *prev, Blocklist *next | |
) { | |
next->prev = curr; | |
curr->next = next; | |
curr->prev = prev; | |
prev->next = curr; | |
} | |
// list_push_front - add a new entry | |
// @curr: curr entry to be added | |
// @head: list head to add it after | |
// | |
// Insert a new entry after the specified head. | |
// This is good for implementing stacks. | |
// | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_push_front( | |
Blocklist *curr, Blocklist *head | |
) { | |
_blocklist_add_impl(curr, head, head->next); | |
} | |
// list_add_tail - add a new entry | |
// @curr: curr entry to be added | |
// @head: list head to add it before | |
// | |
// Insert a new entry before the specified head. | |
// This is useful for implementing queues. | |
// | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_push_back( | |
Blocklist *curr, Blocklist *head | |
) { | |
_blocklist_add_impl(curr, head->prev, head); | |
} | |
// Delete a list entry by making the prev/next entries | |
// point to each other. | |
// | |
// This is only for internal list manipulation where we know | |
// the prev/next entries already! | |
// | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_del_impl( | |
Blocklist * prev, Blocklist * next | |
) { | |
next->prev = prev; | |
prev->next = next; | |
} | |
// _blocklist_del - deletes entry from list. | |
// @entry: the element to delete from the list. | |
// Note: list_empty() on entry does not return true after this, the entry is | |
// in an undefined state. | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_del(Blocklist *entry) { | |
_blocklist_del_impl(entry->prev, entry->next); | |
entry->next = nullptr; | |
entry->prev = nullptr; | |
} | |
// list_replace - replace old entry by new one | |
// @old : the element to be replaced | |
// @curr : the new element to insert | |
// | |
// If @old was empty, it will be overwritten. | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_replace( | |
Blocklist *old, Blocklist *curr | |
) { | |
curr->next = old->next; | |
curr->next->prev = curr; | |
curr->prev = old->prev; | |
curr->prev->next = curr; | |
} | |
// list_move - delete from one list and add as another's head | |
// @list: the entry to move | |
// @head: the head that will precede our entry | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_move_front( | |
Blocklist *list, Blocklist *head | |
) { | |
_blocklist_del_impl(list->prev, list->next); | |
_blocklist_push_front(list, head); | |
} | |
// list_move_tail - delete from one list and add as another's tail | |
// @list: the entry to move | |
// @head: the head that will follow our entry | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_blocklist_move_back( | |
Blocklist *list, Blocklist *head | |
) { | |
_blocklist_del_impl(list->prev, list->next); | |
_blocklist_push_back(list, head); | |
} | |
// list_is_first - tests whether @list is the last entry in list @head | |
// @list: the entry to test | |
// @head: the head of the list | |
template <typename T, size_t S> | |
bool ObjectPool<T, S>::_blocklist_is_first( | |
const Blocklist *list, const Blocklist *head | |
) { | |
return list->prev == head; | |
} | |
// list_is_last - tests whether @list is the last entry in list @head | |
// @list: the entry to test | |
// @head: the head of the list | |
template <typename T, size_t S> | |
bool ObjectPool<T, S>::_blocklist_is_last( | |
const Blocklist *list, const Blocklist *head | |
) { | |
return list->next == head; | |
} | |
// list_empty - tests whether a list is empty | |
// @head: the list to test. | |
template <typename T, size_t S> | |
bool ObjectPool<T, S>::_blocklist_is_empty(const Blocklist *head) { | |
return head->next == head; | |
} | |
// list_is_singular - tests whether a list has just one entry. | |
// @head: the list to test. | |
template <typename T, size_t S> | |
bool ObjectPool<T, S>::_blocklist_is_singular( | |
const Blocklist *head | |
) { | |
return !_blocklist_is_empty(head) && (head->next == head->prev); | |
} | |
// Procedure: _for_each_block | |
template <typename T, size_t S> | |
template <typename C> | |
void ObjectPool<T, S>::_for_each_block(Blocklist* head, C&& c) { | |
Blocklist* p; | |
for(p=head->next; p!=head; p=p->next) { | |
c(_block_of(p)); | |
} | |
} | |
// Procedure: _for_each_block_safe | |
// Iterate each item of a list - safe to free | |
template <typename T, size_t S> | |
template <typename C> | |
void ObjectPool<T, S>::_for_each_block_safe(Blocklist* head, C&& c) { | |
Blocklist* p; | |
Blocklist* t; | |
for(p=head->next, t=p->next; p!=head; p=t, t=p->next) { | |
c(_block_of(p)); | |
} | |
} | |
// Function: _allocate | |
// allocate a spot from the block | |
template <typename T, size_t S> | |
T* ObjectPool<T, S>::_allocate(Block* s) { | |
if(s->top == nullptr) { | |
return reinterpret_cast<T*>(s->data + s->i++ * X); | |
} | |
else { | |
T* retval = s->top; | |
s->top = *(reinterpret_cast<T**>(s->top)); | |
return retval; | |
} | |
} | |
// Procedure: _deallocate | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::_deallocate(Block* s, T* ptr) { | |
*(reinterpret_cast<T**>(ptr)) = s->top; | |
s->top = ptr; | |
} | |
// Function: allocate | |
template <typename T, size_t S> | |
template <typename... ArgsT> | |
T* ObjectPool<T, S>::animate(ArgsT&&... args) { | |
//std::cout << "construct a new item\n"; | |
// my logically mapped heap | |
LocalHeap& h = _this_heap(); | |
Block* s {nullptr}; | |
h.mutex.lock(); | |
// scan the list of superblocks from the most full to the least full | |
int f = static_cast<int>(F-1); | |
for(; f>=0; f--) { | |
if(!_blocklist_is_empty(&h.lists[f])) { | |
s = _block_of(h.lists[f].next); | |
break; | |
} | |
} | |
// no superblock found | |
if(f == -1) { | |
// check heap 0 for a superblock | |
_gheap.mutex.lock(); | |
if(!_blocklist_is_empty(&_gheap.list)) { | |
s = _block_of(_gheap.list.next); | |
//printf("get a superblock from global heap %lu\n", s->u); | |
assert(s->u < M && s->heap == nullptr); | |
f = static_cast<int>(_bin(s->u + 1)); | |
_blocklist_move_front(&s->list_node, &h.lists[f]); | |
s->heap = &h; // must be within the global heap lock | |
_gheap.mutex.unlock(); | |
h.u = h.u + s->u; | |
h.a = h.a + M; | |
} | |
// create a new block | |
else { | |
//printf("create a new superblock\n"); | |
_gheap.mutex.unlock(); | |
f = 0; | |
//s = static_cast<Block*>(std::malloc(sizeof(Block))); | |
s = new Block(); | |
if(s == nullptr) { | |
throw std::bad_alloc(); | |
} | |
s->heap = &h; | |
s->i = 0; | |
s->u = 0; | |
s->top = nullptr; | |
_blocklist_push_front(&s->list_node, &h.lists[f]); | |
h.a = h.a + M; | |
} | |
} | |
// the superblock must have at least one space | |
//assert(s->u < M); | |
//printf("%lu %lu %lu\n", h.u, h.a, s->u); | |
//assert(h.u < h.a); | |
h.u = h.u + 1; | |
s->u = s->u + 1; | |
// take one item from the superblock | |
T* mem = _allocate(s); | |
int b = static_cast<int>(_bin(s->u)); | |
if(b != f) { | |
//printf("move superblock from list[%d] to list[%d]\n", f, b); | |
_blocklist_move_front(&s->list_node, &h.lists[b]); | |
} | |
//std::cout << "s.i " << s->i << '\n' | |
// << "s.u " << s->u << '\n' | |
// << "h.u " << h.u << '\n' | |
// << "h.a " << h.a << '\n'; | |
h.mutex.unlock(); | |
//printf("allocate %p (s=%p)\n", mem, s); | |
new (mem) T(std::forward<ArgsT>(args)...); | |
mem->_object_pool_block = s; | |
return mem; | |
} | |
// Function: destruct | |
template <typename T, size_t S> | |
void ObjectPool<T, S>::recycle(T* mem) { | |
//Block* s = *reinterpret_cast<Block**>( | |
// reinterpret_cast<char*>(mem) - sizeof(Block**) | |
//); | |
//Block* s= *(reinterpret_cast<Block**>(mem) - O); // (mem) - 1 | |
Block* s = static_cast<Block*>(mem->_object_pool_block); | |
mem->~T(); | |
//printf("deallocate %p (s=%p) M=%lu W=%lu X=%lu\n", mem, s, M, W, X); | |
// here we need a loop because when we lock the heap, | |
// other threads may have removed the superblock to another heap | |
bool sync = false; | |
do { | |
LocalHeap* h = s->heap.load(std::memory_order_relaxed); | |
// the block is in global heap | |
if(h == nullptr) { | |
std::lock_guard<std::mutex> glock(_gheap.mutex); | |
if(s->heap == h) { | |
sync = true; | |
_deallocate(s, mem); | |
s->u = s->u - 1; | |
} | |
} | |
else { | |
std::lock_guard<std::mutex> llock(h->mutex); | |
if(s->heap == h) { | |
sync = true; | |
// deallocate the item from the superblock | |
size_t f = _bin(s->u); | |
_deallocate(s, mem); | |
s->u = s->u - 1; | |
h->u = h->u - 1; | |
size_t b = _bin(s->u); | |
if(b != f) { | |
//printf("move superblock from list[%d] to list[%d]\n", f, b); | |
_blocklist_move_front(&s->list_node, &h->lists[b]); | |
} | |
// transfer a mostly-empty superblock to global heap | |
if((h->u + K*M < h->a) && (h->u < ((F-1) * h->a / F))) { | |
for(size_t i=0; i<F; i++) { | |
if(!_blocklist_is_empty(&h->lists[i])) { | |
Block* x = _block_of(h->lists[i].next); | |
//printf("transfer a block (x.u=%lu/x.i=%lu) to the global heap\n", x->u, x->i); | |
assert(h->u > x->u && h->a > M); | |
h->u = h->u - x->u; | |
h->a = h->a - M; | |
x->heap = nullptr; | |
std::lock_guard<std::mutex> glock(_gheap.mutex); | |
_blocklist_move_front(&x->list_node, &_gheap.list); | |
break; | |
} | |
} | |
} | |
} | |
} | |
} while(!sync); | |
//std::cout << "s.i " << s->i << '\n' | |
// << "s.u " << s->u << '\n'; | |
} | |
// Function: _this_heap | |
template <typename T, size_t S> | |
typename ObjectPool<T, S>::LocalHeap& | |
ObjectPool<T, S>::_this_heap() { | |
// here we don't use thread local since object pool might be | |
// created and destroyed multiple times | |
//thread_local auto hv = std::hash<std::thread::id>()(std::this_thread::get_id()); | |
//return _lheaps[hv & _lheap_mask]; | |
return _lheaps[ | |
std::hash<std::thread::id>()(std::this_thread::get_id()) & _lheap_mask | |
]; | |
} | |
// Function: _next_pow2 | |
template <typename T, size_t S> | |
constexpr unsigned ObjectPool<T, S>::_next_pow2(unsigned n) const { | |
if(n == 0) return 1; | |
n--; | |
n |= n >> 1; | |
n |= n >> 2; | |
n |= n >> 4; | |
n |= n >> 8; | |
n |= n >> 16; | |
n++; | |
return n; | |
} | |
} // end namespace tf -------------------------------------------------------- | |
#include <atomic> | |
namespace tf { | |
// rounds the given 64-bit unsigned integer to the nearest power of 2 | |
template <typename T, std::enable_if_t< | |
(std::is_unsigned_v<std::decay_t<T>> && sizeof(T) == 8) , void | |
>* = nullptr> | |
constexpr T next_pow2(T x) { | |
if(x == 0) return 1; | |
x--; | |
x |= x>>1; | |
x |= x>>2; | |
x |= x>>4; | |
x |= x>>8; | |
x |= x>>16; | |
x |= x>>32; | |
x++; | |
return x; | |
} | |
// rounds the given 32-bit unsigned integer to the nearest power of 2 | |
template <typename T, std::enable_if_t< | |
(std::is_unsigned_v<std::decay_t<T>> && sizeof(T) == 4), void | |
>* = nullptr> | |
constexpr T next_pow2(T x) { | |
if(x == 0) return 1; | |
x--; | |
x |= x>>1; | |
x |= x>>2; | |
x |= x>>4; | |
x |= x>>8; | |
x |= x>>16; | |
x++; | |
return x; | |
} | |
// checks if the given number if a power of 2 | |
template <typename T, std::enable_if_t< | |
std::is_integral_v<std::decay_t<T>>, void>* = nullptr | |
> | |
constexpr bool is_pow2(const T& x) { | |
return x && (!(x&(x-1))); | |
} | |
//// finds the ceil of x divided by b | |
//template <typename T, std::enable_if_t< | |
// std::is_integral_v<std::decay_t<T>>, void>* = nullptr | |
//> | |
//constexpr T ceil(const T& x, const T& y) { | |
// //return (x + y - 1) / y; | |
// return (x-1) / y + 1; | |
//} | |
/** | |
@brief returns floor(log2(n)), assumes n > 0 | |
*/ | |
template<typename T> | |
constexpr int log2(T n) { | |
int log = 0; | |
while (n >>= 1) { | |
++log; | |
} | |
return log; | |
} | |
/** | |
@brief finds the median of three numbers of dereferenced iterators using | |
the given comparator | |
*/ | |
template <typename RandItr, typename C> | |
RandItr median_of_three(RandItr l, RandItr m, RandItr r, C cmp) { | |
return cmp(*l, *m) ? (cmp(*m, *r) ? m : (cmp(*l, *r) ? r : l )) | |
: (cmp(*r, *m) ? m : (cmp(*r, *l) ? r : l )); | |
} | |
/** | |
@brief finds the pseudo median of a range of items using spreaded | |
nine numbers | |
*/ | |
template <typename RandItr, typename C> | |
RandItr pseudo_median_of_nine(RandItr beg, RandItr end, C cmp) { | |
size_t N = std::distance(beg, end); | |
size_t offset = N >> 3; | |
return median_of_three( | |
median_of_three(beg, beg+offset, beg+(offset*2), cmp), | |
median_of_three(beg+(offset*3), beg+(offset*4), beg+(offset*5), cmp), | |
median_of_three(beg+(offset*6), beg+(offset*7), end-1, cmp), | |
cmp | |
); | |
} | |
/** | |
@brief sorts two elements of dereferenced iterators using the given | |
comparison function | |
*/ | |
template<typename Iter, typename Compare> | |
void sort2(Iter a, Iter b, Compare comp) { | |
if (comp(*b, *a)) std::iter_swap(a, b); | |
} | |
/** | |
@brief sorts three elements of dereferenced iterators using the given | |
comparison function | |
*/ | |
template<typename Iter, typename Compare> | |
void sort3(Iter a, Iter b, Iter c, Compare comp) { | |
sort2(a, b, comp); | |
sort2(b, c, comp); | |
sort2(a, b, comp); | |
} | |
/** | |
@brief generates a program-wise unique id of the give type (thread-safe) | |
*/ | |
template <typename T, std::enable_if_t<std::is_integral_v<T>, void>* = nullptr> | |
T unique_id() { | |
static std::atomic<T> counter{0}; | |
return counter.fetch_add(1, std::memory_order_relaxed); | |
} | |
/** | |
@brief updates an atomic variable with a maximum value | |
*/ | |
template <typename T> | |
inline void atomic_max(std::atomic<T>& v, const T& max_v) noexcept { | |
T prev = v.load(std::memory_order_relaxed); | |
while(prev < max_v && | |
!v.compare_exchange_weak(prev, max_v, std::memory_order_relaxed, | |
std::memory_order_relaxed)) { | |
} | |
} | |
/** | |
@brief updates an atomic variable with a minimum value | |
*/ | |
template <typename T> | |
inline void atomic_min(std::atomic<T>& v, const T& min_v) noexcept { | |
T prev = v.load(std::memory_order_relaxed); | |
while(prev > min_v && | |
!v.compare_exchange_weak(prev, min_v, std::memory_order_relaxed, | |
std::memory_order_relaxed)) { | |
} | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
// small vector modified from llvm | |
#include <algorithm> | |
#include <cassert> | |
#include <cstddef> | |
#include <cstdlib> | |
#include <cstring> | |
#include <initializer_list> | |
#include <iterator> | |
#include <memory> | |
#if defined(__GNUC__) | |
#define TF_LIKELY(x) (__builtin_expect((x), 1)) | |
#define TF_UNLIKELY(x) (__builtin_expect((x), 0)) | |
#else | |
#define TF_LIKELY(x) (x) | |
#define TF_UNLIKELY(x) (x) | |
#endif | |
/** | |
@file small_vector.hpp | |
@brief small vector include file | |
*/ | |
namespace tf { namespace detail { | |
/** | |
@private | |
@brief NextCapacity - Returns the next power of two (in 64-bits) | |
that is strictly greater than A. Returns zero on overflow. | |
this function assumes A to be positive | |
*/ | |
inline uint64_t NextCapacity(uint64_t A) { | |
A |= (A >> 1); | |
A |= (A >> 2); | |
A |= (A >> 4); | |
A |= (A >> 8); | |
A |= (A >> 16); | |
A |= (A >> 32); | |
return A + 1; | |
} | |
}} // end of namespace tf::detail -------------------------------------------- | |
namespace tf { | |
/** | |
@private | |
*/ | |
template <typename T> | |
struct IsPod : std::integral_constant<bool, std::is_standard_layout<T>::value && | |
std::is_trivial<T>::value> {}; | |
/** | |
@private | |
*/ | |
class SmallVectorBase { | |
protected: | |
void *BeginX, *EndX, *CapacityX; | |
protected: | |
SmallVectorBase(void *FirstEl, size_t Size) | |
: BeginX(FirstEl), EndX(FirstEl), CapacityX((char*)FirstEl+Size) {} | |
/// This is an implementation of the grow() method which only works | |
/// on POD-like data types and is out of line to reduce code duplication. | |
void grow_pod(void *FirstEl, size_t MinSizeInBytes, size_t TSize){ | |
size_t CurSizeBytes = size_in_bytes(); | |
size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow. | |
if (NewCapacityInBytes < MinSizeInBytes) { | |
NewCapacityInBytes = MinSizeInBytes; | |
} | |
void *NewElts; | |
if (BeginX == FirstEl) { | |
NewElts = std::malloc(NewCapacityInBytes); | |
// Copy the elements over. No need to run dtors on PODs. | |
memcpy(NewElts, this->BeginX, CurSizeBytes); | |
} else { | |
// If this wasn't grown from the inline copy, grow the allocated space. | |
NewElts = realloc(this->BeginX, NewCapacityInBytes); | |
} | |
//assert(NewElts && "Out of memory"); | |
this->EndX = (char*)NewElts+CurSizeBytes; | |
this->BeginX = NewElts; | |
this->CapacityX = (char*)this->BeginX + NewCapacityInBytes; | |
} | |
public: | |
/// This returns size()*sizeof(T). | |
size_t size_in_bytes() const { | |
return size_t((char*)EndX - (char*)BeginX); | |
} | |
/// capacity_in_bytes - This returns capacity()*sizeof(T). | |
size_t capacity_in_bytes() const { | |
return size_t((char*)CapacityX - (char*)BeginX); | |
} | |
bool empty() const { return BeginX == EndX; } | |
}; | |
/** | |
@private | |
*/ | |
template <typename T, unsigned N> struct SmallVectorStorage; | |
/** | |
@private | |
*/ | |
template <typename T, typename = void> | |
class SmallVectorTemplateCommon : public SmallVectorBase { | |
private: | |
template <typename, unsigned> friend struct SmallVectorStorage; | |
template <typename X> | |
struct AlignedUnionType { | |
alignas(X) std::byte buff[std::max(sizeof(std::byte), sizeof(X))]; | |
}; | |
// Allocate raw space for N elements of type T. If T has a ctor or dtor, we | |
// don't want it to be automatically run, so we need to represent the space as | |
// something else. Use an array of char of sufficient alignment. | |
// deprecated in c++23 | |
//typedef typename std::aligned_union<1, T>::type U; | |
typedef AlignedUnionType<T> U; | |
U FirstEl; | |
// Space after 'FirstEl' is clobbered, do not add any instance vars after it. | |
protected: | |
SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(&FirstEl, Size) {} | |
void grow_pod(size_t MinSizeInBytes, size_t TSize) { | |
SmallVectorBase::grow_pod(&FirstEl, MinSizeInBytes, TSize); | |
} | |
/// Return true if this is a smallvector which has not had dynamic | |
/// memory allocated for it. | |
bool isSmall() const { | |
return BeginX == static_cast<const void*>(&FirstEl); | |
} | |
/// Put this vector in a state of being small. | |
void resetToSmall() { | |
BeginX = EndX = CapacityX = &FirstEl; | |
} | |
void setEnd(T *P) { this->EndX = P; } | |
public: | |
typedef size_t size_type; | |
typedef ptrdiff_t difference_type; | |
typedef T value_type; | |
typedef T *iterator; | |
typedef const T *const_iterator; | |
typedef std::reverse_iterator<const_iterator> const_reverse_iterator; | |
typedef std::reverse_iterator<iterator> reverse_iterator; | |
typedef T &reference; | |
typedef const T &const_reference; | |
typedef T *pointer; | |
typedef const T *const_pointer; | |
// forward iterator creation methods. | |
inline iterator begin() { return (iterator)this->BeginX; } | |
inline const_iterator begin() const { return (const_iterator)this->BeginX; } | |
inline iterator end() { return (iterator)this->EndX; } | |
inline const_iterator end() const { return (const_iterator)this->EndX; } | |
protected: | |
iterator capacity_ptr() { return (iterator)this->CapacityX; } | |
const_iterator capacity_ptr() const { return (const_iterator)this->CapacityX;} | |
public: | |
// reverse iterator creation methods. | |
reverse_iterator rbegin() { return reverse_iterator(end()); } | |
const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } | |
reverse_iterator rend() { return reverse_iterator(begin()); } | |
const_reverse_iterator rend() const { return const_reverse_iterator(begin());} | |
inline size_type size() const { return end()-begin(); } | |
inline size_type max_size() const { return size_type(-1) / sizeof(T); } | |
/// Return the total number of elements in the currently allocated buffer. | |
size_t capacity() const { return capacity_ptr() - begin(); } | |
/// Return a pointer to the vector's buffer, even if empty(). | |
pointer data() { return pointer(begin()); } | |
/// Return a pointer to the vector's buffer, even if empty(). | |
const_pointer data() const { return const_pointer(begin()); } | |
inline reference operator[](size_type idx) { | |
//assert(idx < size()); | |
return begin()[idx]; | |
} | |
inline const_reference operator[](size_type idx) const { | |
//assert(idx < size()); | |
return begin()[idx]; | |
} | |
reference front() { | |
//assert(!empty()); | |
return begin()[0]; | |
} | |
const_reference front() const { | |
//assert(!empty()); | |
return begin()[0]; | |
} | |
reference back() { | |
//assert(!empty()); | |
return end()[-1]; | |
} | |
const_reference back() const { | |
//assert(!empty()); | |
return end()[-1]; | |
} | |
}; | |
/** | |
@private | |
*/ | |
template <typename T, bool isPodLike> | |
class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { | |
protected: | |
SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} | |
static void destroy_range(T *S, T *E) { | |
while (S != E) { | |
--E; | |
E->~T(); | |
} | |
} | |
/// Move the range [I, E) into the uninitialized memory starting with "Dest", | |
/// constructing elements as needed. | |
template<typename It1, typename It2> | |
static void uninitialized_move(It1 I, It1 E, It2 Dest) { | |
std::uninitialized_copy(std::make_move_iterator(I), | |
std::make_move_iterator(E), Dest); | |
} | |
/// Copy the range [I, E) onto the uninitialized memory starting with "Dest", | |
/// constructing elements as needed. | |
template<typename It1, typename It2> | |
static void uninitialized_copy(It1 I, It1 E, It2 Dest) { | |
std::uninitialized_copy(I, E, Dest); | |
} | |
/// Grow the allocated memory (without initializing new elements), doubling | |
/// the size of the allocated memory. Guarantees space for at least one more | |
/// element, or MinSize more elements if specified. | |
void grow(size_t MinSize = 0); | |
public: | |
void push_back(const T &Elt) { | |
if (TF_UNLIKELY(this->EndX >= this->CapacityX)) | |
this->grow(); | |
::new ((void*) this->end()) T(Elt); | |
this->setEnd(this->end()+1); | |
} | |
void push_back(T &&Elt) { | |
if (TF_UNLIKELY(this->EndX >= this->CapacityX)) | |
this->grow(); | |
::new ((void*) this->end()) T(::std::move(Elt)); | |
this->setEnd(this->end()+1); | |
} | |
void pop_back() { | |
this->setEnd(this->end()-1); | |
this->end()->~T(); | |
} | |
}; | |
/** | |
@private | |
*/ | |
template <typename T, bool isPodLike> | |
void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) { | |
size_t CurCapacity = this->capacity(); | |
size_t CurSize = this->size(); | |
// Always grow, even from zero. | |
size_t NewCapacity = size_t(tf::detail::NextCapacity(CurCapacity+2)); | |
if (NewCapacity < MinSize) | |
NewCapacity = MinSize; | |
T *NewElts = static_cast<T*>(std::malloc(NewCapacity*sizeof(T))); | |
// Move the elements over. | |
this->uninitialized_move(this->begin(), this->end(), NewElts); | |
// Destroy the original elements. | |
destroy_range(this->begin(), this->end()); | |
// If this wasn't grown from the inline copy, deallocate the old space. | |
if (!this->isSmall()) | |
std::free(this->begin()); | |
this->setEnd(NewElts+CurSize); | |
this->BeginX = NewElts; | |
this->CapacityX = this->begin()+NewCapacity; | |
} | |
/** | |
@private | |
*/ | |
template <typename T> | |
class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { | |
protected: | |
SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} | |
// No need to do a destroy loop for POD's. | |
static void destroy_range(T *, T *) {} | |
/// Move the range [I, E) onto the uninitialized memory | |
/// starting with "Dest", constructing elements into it as needed. | |
template<typename It1, typename It2> | |
static void uninitialized_move(It1 I, It1 E, It2 Dest) { | |
// Just do a copy. | |
uninitialized_copy(I, E, Dest); | |
} | |
/// Copy the range [I, E) onto the uninitialized memory | |
/// starting with "Dest", constructing elements into it as needed. | |
template<typename It1, typename It2> | |
static void uninitialized_copy(It1 I, It1 E, It2 Dest) { | |
// Arbitrary iterator types; just use the basic implementation. | |
std::uninitialized_copy(I, E, Dest); | |
} | |
/// Copy the range [I, E) onto the uninitialized memory | |
/// starting with "Dest", constructing elements into it as needed. | |
template <typename T1, typename T2> | |
static void uninitialized_copy( | |
T1 *I, T1 *E, T2 *Dest, | |
typename std::enable_if<std::is_same<typename std::remove_const<T1>::type, | |
T2>::value>::type * = nullptr) { | |
// Use memcpy for PODs iterated by pointers (which includes SmallVector | |
// iterators): std::uninitialized_copy optimizes to memmove, but we can | |
// use memcpy here. Note that I and E are iterators and thus might be | |
// invalid for memcpy if they are equal. | |
if (I != E) | |
memcpy(Dest, I, (E - I) * sizeof(T)); | |
} | |
/// Double the size of the allocated memory, guaranteeing space for at | |
/// least one more element or MinSize if specified. | |
void grow(size_t MinSize = 0) { | |
this->grow_pod(MinSize*sizeof(T), sizeof(T)); | |
} | |
public: | |
void push_back(const T &Elt) { | |
if (TF_UNLIKELY(this->EndX >= this->CapacityX)) | |
this->grow(); | |
memcpy(this->end(), &Elt, sizeof(T)); | |
this->setEnd(this->end()+1); | |
} | |
void pop_back() { | |
this->setEnd(this->end()-1); | |
} | |
}; | |
/** | |
@private | |
*/ | |
template <typename T> | |
class SmallVectorImpl : public SmallVectorTemplateBase<T, IsPod<T>::value> { | |
typedef SmallVectorTemplateBase<T, IsPod<T>::value> SuperClass; | |
SmallVectorImpl(const SmallVectorImpl&) = delete; | |
public: | |
typedef typename SuperClass::iterator iterator; | |
typedef typename SuperClass::const_iterator const_iterator; | |
typedef typename SuperClass::size_type size_type; | |
protected: | |
// Default ctor - Initialize to empty. | |
explicit SmallVectorImpl(unsigned N) | |
: SmallVectorTemplateBase<T, IsPod<T>::value>(N*sizeof(T)) { | |
} | |
public: | |
~SmallVectorImpl() { | |
// Destroy the constructed elements in the vector. | |
this->destroy_range(this->begin(), this->end()); | |
// If this wasn't grown from the inline copy, deallocate the old space. | |
if (!this->isSmall()) | |
std::free(this->begin()); | |
} | |
void clear() { | |
this->destroy_range(this->begin(), this->end()); | |
this->EndX = this->BeginX; | |
} | |
void resize(size_type N) { | |
if (N < this->size()) { | |
this->destroy_range(this->begin()+N, this->end()); | |
this->setEnd(this->begin()+N); | |
} else if (N > this->size()) { | |
if (this->capacity() < N) | |
this->grow(N); | |
for (auto I = this->end(), E = this->begin() + N; I != E; ++I) | |
new (&*I) T(); | |
this->setEnd(this->begin()+N); | |
} | |
} | |
void resize(size_type N, const T &NV) { | |
if (N < this->size()) { | |
this->destroy_range(this->begin()+N, this->end()); | |
this->setEnd(this->begin()+N); | |
} else if (N > this->size()) { | |
if (this->capacity() < N) | |
this->grow(N); | |
std::uninitialized_fill(this->end(), this->begin()+N, NV); | |
this->setEnd(this->begin()+N); | |
} | |
} | |
void reserve(size_type N) { | |
if (this->capacity() < N) | |
this->grow(N); | |
} | |
T pop_back_val() { | |
T Result = ::std::move(this->back()); | |
this->pop_back(); | |
return Result; | |
} | |
void swap(SmallVectorImpl &RHS); | |
/// Add the specified range to the end of the SmallVector. | |
template<typename in_iter> | |
void append(in_iter in_start, in_iter in_end) { | |
size_type NumInputs = std::distance(in_start, in_end); | |
// Grow allocated space if needed. | |
if (NumInputs > size_type(this->capacity_ptr()-this->end())) | |
this->grow(this->size()+NumInputs); | |
// Copy the new elements over. | |
this->uninitialized_copy(in_start, in_end, this->end()); | |
this->setEnd(this->end() + NumInputs); | |
} | |
/// Add the specified range to the end of the SmallVector. | |
void append(size_type NumInputs, const T &Elt) { | |
// Grow allocated space if needed. | |
if (NumInputs > size_type(this->capacity_ptr()-this->end())) | |
this->grow(this->size()+NumInputs); | |
// Copy the new elements over. | |
std::uninitialized_fill_n(this->end(), NumInputs, Elt); | |
this->setEnd(this->end() + NumInputs); | |
} | |
void append(std::initializer_list<T> IL) { | |
append(IL.begin(), IL.end()); | |
} | |
void assign(size_type NumElts, const T &Elt) { | |
clear(); | |
if (this->capacity() < NumElts) | |
this->grow(NumElts); | |
this->setEnd(this->begin()+NumElts); | |
std::uninitialized_fill(this->begin(), this->end(), Elt); | |
} | |
void assign(std::initializer_list<T> IL) { | |
clear(); | |
append(IL); | |
} | |
iterator erase(const_iterator CI) { | |
// Just cast away constness because this is a non-const member function. | |
iterator I = const_cast<iterator>(CI); | |
//assert(I >= this->begin() && "Iterator to erase is out of bounds."); | |
//assert(I < this->end() && "Erasing at past-the-end iterator."); | |
iterator N = I; | |
// Shift all elts down one. | |
std::move(I+1, this->end(), I); | |
// Drop the last elt. | |
this->pop_back(); | |
return(N); | |
} | |
iterator erase(const_iterator CS, const_iterator CE) { | |
// Just cast away constness because this is a non-const member function. | |
iterator S = const_cast<iterator>(CS); | |
iterator E = const_cast<iterator>(CE); | |
//assert(S >= this->begin() && "Range to erase is out of bounds."); | |
//assert(S <= E && "Trying to erase invalid range."); | |
//assert(E <= this->end() && "Trying to erase past the end."); | |
iterator N = S; | |
// Shift all elts down. | |
iterator I = std::move(E, this->end(), S); | |
// Drop the last elts. | |
this->destroy_range(I, this->end()); | |
this->setEnd(I); | |
return(N); | |
} | |
iterator insert(iterator I, T &&Elt) { | |
if (I == this->end()) { // Important special case for empty vector. | |
this->push_back(::std::move(Elt)); | |
return this->end()-1; | |
} | |
//assert(I >= this->begin() && "Insertion iterator is out of bounds."); | |
//assert(I <= this->end() && "Inserting past the end of the vector."); | |
if (this->EndX >= this->CapacityX) { | |
size_t EltNo = I-this->begin(); | |
this->grow(); | |
I = this->begin()+EltNo; | |
} | |
::new ((void*) this->end()) T(::std::move(this->back())); | |
// Push everything else over. | |
std::move_backward(I, this->end()-1, this->end()); | |
this->setEnd(this->end()+1); | |
// If we just moved the element we're inserting, be sure to update | |
// the reference. | |
T *EltPtr = &Elt; | |
if (I <= EltPtr && EltPtr < this->EndX) | |
++EltPtr; | |
*I = ::std::move(*EltPtr); | |
return I; | |
} | |
iterator insert(iterator I, const T &Elt) { | |
if (I == this->end()) { // Important special case for empty vector. | |
this->push_back(Elt); | |
return this->end()-1; | |
} | |
//assert(I >= this->begin() && "Insertion iterator is out of bounds."); | |
//assert(I <= this->end() && "Inserting past the end of the vector."); | |
if (this->EndX >= this->CapacityX) { | |
size_t EltNo = I-this->begin(); | |
this->grow(); | |
I = this->begin()+EltNo; | |
} | |
::new ((void*) this->end()) T(std::move(this->back())); | |
// Push everything else over. | |
std::move_backward(I, this->end()-1, this->end()); | |
this->setEnd(this->end()+1); | |
// If we just moved the element we're inserting, be sure to update | |
// the reference. | |
const T *EltPtr = &Elt; | |
if (I <= EltPtr && EltPtr < this->EndX) | |
++EltPtr; | |
*I = *EltPtr; | |
return I; | |
} | |
iterator insert(iterator I, size_type NumToInsert, const T &Elt) { | |
// Convert iterator to elt# to avoid invalidating iterator when we reserve() | |
size_t InsertElt = I - this->begin(); | |
if (I == this->end()) { // Important special case for empty vector. | |
append(NumToInsert, Elt); | |
return this->begin()+InsertElt; | |
} | |
//assert(I >= this->begin() && "Insertion iterator is out of bounds."); | |
//assert(I <= this->end() && "Inserting past the end of the vector."); | |
// Ensure there is enough space. | |
reserve(this->size() + NumToInsert); | |
// Uninvalidate the iterator. | |
I = this->begin()+InsertElt; | |
// If there are more elements between the insertion point and the end of the | |
// range than there are being inserted, we can use a simple approach to | |
// insertion. Since we already reserved space, we know that this won't | |
// reallocate the vector. | |
if (size_t(this->end()-I) >= NumToInsert) { | |
T *OldEnd = this->end(); | |
append(std::move_iterator<iterator>(this->end() - NumToInsert), | |
std::move_iterator<iterator>(this->end())); | |
// Copy the existing elements that get replaced. | |
std::move_backward(I, OldEnd-NumToInsert, OldEnd); | |
std::fill_n(I, NumToInsert, Elt); | |
return I; | |
} | |
// Otherwise, we're inserting more elements than exist already, and we're | |
// not inserting at the end. | |
// Move over the elements that we're about to overwrite. | |
T *OldEnd = this->end(); | |
this->setEnd(this->end() + NumToInsert); | |
size_t NumOverwritten = OldEnd-I; | |
this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); | |
// Replace the overwritten part. | |
std::fill_n(I, NumOverwritten, Elt); | |
// Insert the non-overwritten middle part. | |
std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt); | |
return I; | |
} | |
template<typename ItTy> | |
iterator insert(iterator I, ItTy From, ItTy To) { | |
// Convert iterator to elt# to avoid invalidating iterator when we reserve() | |
size_t InsertElt = I - this->begin(); | |
if (I == this->end()) { // Important special case for empty vector. | |
append(From, To); | |
return this->begin()+InsertElt; | |
} | |
//assert(I >= this->begin() && "Insertion iterator is out of bounds."); | |
//assert(I <= this->end() && "Inserting past the end of the vector."); | |
size_t NumToInsert = std::distance(From, To); | |
// Ensure there is enough space. | |
reserve(this->size() + NumToInsert); | |
// Uninvalidate the iterator. | |
I = this->begin()+InsertElt; | |
// If there are more elements between the insertion point and the end of the | |
// range than there are being inserted, we can use a simple approach to | |
// insertion. Since we already reserved space, we know that this won't | |
// reallocate the vector. | |
if (size_t(this->end()-I) >= NumToInsert) { | |
T *OldEnd = this->end(); | |
append(std::move_iterator<iterator>(this->end() - NumToInsert), | |
std::move_iterator<iterator>(this->end())); | |
// Copy the existing elements that get replaced. | |
std::move_backward(I, OldEnd-NumToInsert, OldEnd); | |
std::copy(From, To, I); | |
return I; | |
} | |
// Otherwise, we're inserting more elements than exist already, and we're | |
// not inserting at the end. | |
// Move over the elements that we're about to overwrite. | |
T *OldEnd = this->end(); | |
this->setEnd(this->end() + NumToInsert); | |
size_t NumOverwritten = OldEnd-I; | |
this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); | |
// Replace the overwritten part. | |
for (T *J = I; NumOverwritten > 0; --NumOverwritten) { | |
*J = *From; | |
++J; ++From; | |
} | |
// Insert the non-overwritten middle part. | |
this->uninitialized_copy(From, To, OldEnd); | |
return I; | |
} | |
void insert(iterator I, std::initializer_list<T> IL) { | |
insert(I, IL.begin(), IL.end()); | |
} | |
template <typename... ArgTypes> void emplace_back(ArgTypes &&... Args) { | |
if (TF_UNLIKELY(this->EndX >= this->CapacityX)) | |
this->grow(); | |
::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...); | |
this->setEnd(this->end() + 1); | |
} | |
SmallVectorImpl &operator=(const SmallVectorImpl &RHS); | |
SmallVectorImpl &operator=(SmallVectorImpl &&RHS); | |
bool operator==(const SmallVectorImpl &RHS) const { | |
if (this->size() != RHS.size()) return false; | |
return std::equal(this->begin(), this->end(), RHS.begin()); | |
} | |
bool operator!=(const SmallVectorImpl &RHS) const { | |
return !(*this == RHS); | |
} | |
bool operator<(const SmallVectorImpl &RHS) const { | |
return std::lexicographical_compare(this->begin(), this->end(), | |
RHS.begin(), RHS.end()); | |
} | |
/// Set the array size to \p N, which the current array must have enough | |
/// capacity for. | |
/// | |
/// This does not construct or destroy any elements in the vector. | |
/// | |
/// Clients can use this in conjunction with capacity() to write past the end | |
/// of the buffer when they know that more elements are available, and only | |
/// update the size later. This avoids the cost of value initializing elements | |
/// which will only be overwritten. | |
void set_size(size_type N) { | |
//assert(N <= this->capacity()); | |
this->setEnd(this->begin() + N); | |
} | |
}; | |
template <typename T> | |
void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) { | |
if (this == &RHS) return; | |
// We can only avoid copying elements if neither vector is small. | |
if (!this->isSmall() && !RHS.isSmall()) { | |
std::swap(this->BeginX, RHS.BeginX); | |
std::swap(this->EndX, RHS.EndX); | |
std::swap(this->CapacityX, RHS.CapacityX); | |
return; | |
} | |
if (RHS.size() > this->capacity()) | |
this->grow(RHS.size()); | |
if (this->size() > RHS.capacity()) | |
RHS.grow(this->size()); | |
// Swap the shared elements. | |
size_t NumShared = this->size(); | |
if (NumShared > RHS.size()) NumShared = RHS.size(); | |
for (size_type i = 0; i != NumShared; ++i) | |
std::swap((*this)[i], RHS[i]); | |
// Copy over the extra elts. | |
if (this->size() > RHS.size()) { | |
size_t EltDiff = this->size() - RHS.size(); | |
this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); | |
RHS.setEnd(RHS.end()+EltDiff); | |
this->destroy_range(this->begin()+NumShared, this->end()); | |
this->setEnd(this->begin()+NumShared); | |
} else if (RHS.size() > this->size()) { | |
size_t EltDiff = RHS.size() - this->size(); | |
this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); | |
this->setEnd(this->end() + EltDiff); | |
this->destroy_range(RHS.begin()+NumShared, RHS.end()); | |
RHS.setEnd(RHS.begin()+NumShared); | |
} | |
} | |
template <typename T> | |
SmallVectorImpl<T> &SmallVectorImpl<T>:: | |
operator=(const SmallVectorImpl<T> &RHS) { | |
// Avoid self-assignment. | |
if (this == &RHS) return *this; | |
// If we already have sufficient space, assign the common elements, then | |
// destroy any excess. | |
size_t RHSSize = RHS.size(); | |
size_t CurSize = this->size(); | |
if (CurSize >= RHSSize) { | |
// Assign common elements. | |
iterator NewEnd; | |
if (RHSSize) | |
NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin()); | |
else | |
NewEnd = this->begin(); | |
// Destroy excess elements. | |
this->destroy_range(NewEnd, this->end()); | |
// Trim. | |
this->setEnd(NewEnd); | |
return *this; | |
} | |
// If we have to grow to have enough elements, destroy the current elements. | |
// This allows us to avoid copying them during the grow. | |
// FIXME: don't do this if they're efficiently moveable. | |
if (this->capacity() < RHSSize) { | |
// Destroy current elements. | |
this->destroy_range(this->begin(), this->end()); | |
this->setEnd(this->begin()); | |
CurSize = 0; | |
this->grow(RHSSize); | |
} else if (CurSize) { | |
// Otherwise, use assignment for the already-constructed elements. | |
std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin()); | |
} | |
// Copy construct the new elements in place. | |
this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(), | |
this->begin()+CurSize); | |
// Set end. | |
this->setEnd(this->begin()+RHSSize); | |
return *this; | |
} | |
template <typename T> | |
SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { | |
// Avoid self-assignment. | |
if (this == &RHS) return *this; | |
// If the RHS isn't small, clear this vector and then steal its buffer. | |
if (!RHS.isSmall()) { | |
this->destroy_range(this->begin(), this->end()); | |
if (!this->isSmall()) std::free(this->begin()); | |
this->BeginX = RHS.BeginX; | |
this->EndX = RHS.EndX; | |
this->CapacityX = RHS.CapacityX; | |
RHS.resetToSmall(); | |
return *this; | |
} | |
// If we already have sufficient space, assign the common elements, then | |
// destroy any excess. | |
size_t RHSSize = RHS.size(); | |
size_t CurSize = this->size(); | |
if (CurSize >= RHSSize) { | |
// Assign common elements. | |
iterator NewEnd = this->begin(); | |
if (RHSSize) | |
NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd); | |
// Destroy excess elements and trim the bounds. | |
this->destroy_range(NewEnd, this->end()); | |
this->setEnd(NewEnd); | |
// Clear the RHS. | |
RHS.clear(); | |
return *this; | |
} | |
// If we have to grow to have enough elements, destroy the current elements. | |
// This allows us to avoid copying them during the grow. | |
// FIXME: this may not actually make any sense if we can efficiently move | |
// elements. | |
if (this->capacity() < RHSSize) { | |
// Destroy current elements. | |
this->destroy_range(this->begin(), this->end()); | |
this->setEnd(this->begin()); | |
CurSize = 0; | |
this->grow(RHSSize); | |
} else if (CurSize) { | |
// Otherwise, use assignment for the already-constructed elements. | |
std::move(RHS.begin(), RHS.begin()+CurSize, this->begin()); | |
} | |
// Move-construct the new elements in place. | |
this->uninitialized_move(RHS.begin()+CurSize, RHS.end(), | |
this->begin()+CurSize); | |
// Set end. | |
this->setEnd(this->begin()+RHSSize); | |
RHS.clear(); | |
return *this; | |
} | |
/** | |
@private | |
*/ | |
template <typename T, unsigned N> | |
struct SmallVectorStorage { | |
/** | |
@private | |
*/ | |
typename SmallVectorTemplateCommon<T>::U InlineElts[N - 1]; | |
}; | |
/** | |
@private | |
*/ | |
template <typename T> struct SmallVectorStorage<T, 1> {}; | |
/** | |
@private | |
*/ | |
template <typename T> struct SmallVectorStorage<T, 0> {}; | |
/** | |
@brief class to define a vector optimized for small array | |
@tparam T data type | |
@tparam N threshold of the number of elements in the initial storage | |
The class defines a C++ STL-styled vector (a variable-sized array) | |
optimized for the case when the array is small. | |
It contains some number of elements in-place, | |
which allows it to avoid heap allocation when the actual number of | |
elements is below that threshold. This allows normal @em small cases to be | |
fast without losing generality for large inputs. | |
All the methods in [std::vector](https://en.cppreference.com/w/cpp/container/vector) | |
can apply to this class. | |
The class is stripped from the LLVM codebase. | |
*/ | |
template <typename T, unsigned N = 2> | |
class SmallVector : public SmallVectorImpl<T> { | |
/// Inline space for elements which aren't stored in the base class. | |
SmallVectorStorage<T, N> Storage; | |
public: | |
/** | |
@brief constructs an empty vector | |
*/ | |
SmallVector() : SmallVectorImpl<T>(N) { | |
} | |
/** | |
@brief constructs a vector with @c Size copies of elements with value @c value | |
*/ | |
explicit SmallVector(size_t Size, const T &Value = T()) | |
: SmallVectorImpl<T>(N) { | |
this->assign(Size, Value); | |
} | |
/** | |
@brief constructs a vector with the contents of the range | |
<tt>[S, E)</tt> | |
*/ | |
template<typename ItTy> | |
SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) { | |
this->append(S, E); | |
} | |
//template <typename RangeTy> | |
//explicit SmallVector(const tf::iterator_range<RangeTy> &R) | |
// : SmallVectorImpl<T>(N) { | |
// this->append(R.begin(), R.end()); | |
//} | |
/** | |
@brief constructs a vector with the contents of the initializer list @c IL | |
*/ | |
SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) { | |
this->assign(IL); | |
} | |
/** | |
@brief constructs the vector with the copy of the contents of @c RHS | |
*/ | |
SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) { | |
if (!RHS.empty()) | |
SmallVectorImpl<T>::operator=(RHS); | |
} | |
/** | |
@brief constructs the vector with the contents of @c RHS using move semantics | |
*/ | |
SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) { | |
if (!RHS.empty()) | |
SmallVectorImpl<T>::operator=(::std::move(RHS)); | |
} | |
/** | |
@brief replaces the contents with a copy of the contents of @c RHS | |
*/ | |
const SmallVector &operator=(const SmallVector &RHS) { | |
SmallVectorImpl<T>::operator=(RHS); | |
return *this; | |
} | |
/** | |
@brief replaces the contents with the contents of @c RHS using move semantics | |
*/ | |
const SmallVector &operator=(SmallVector &&RHS) { | |
SmallVectorImpl<T>::operator=(::std::move(RHS)); | |
return *this; | |
} | |
/** | |
@brief constructs a vector with the contents of @c RHS using move semantics | |
*/ | |
SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) { | |
if (!RHS.empty()) | |
SmallVectorImpl<T>::operator=(::std::move(RHS)); | |
} | |
/** | |
@brief replaces the contents with the contents of @c RHS using move semantics | |
*/ | |
const SmallVector &operator=(SmallVectorImpl<T> &&RHS) { | |
SmallVectorImpl<T>::operator=(::std::move(RHS)); | |
return *this; | |
} | |
/** | |
@brief replaces the contents with the copy of the contents of an initializer list @c IL | |
*/ | |
const SmallVector &operator=(std::initializer_list<T> IL) { | |
this->assign(IL); | |
return *this; | |
} | |
}; | |
template<typename T, unsigned N> | |
static inline size_t capacity_in_bytes(const SmallVector<T, N> &X) { | |
return X.capacity_in_bytes(); | |
} | |
} // end tf namespace --------------------------------------------------------- | |
namespace std { | |
/// Implement std::swap in terms of SmallVector swap. | |
template<typename T> | |
inline void | |
swap(tf::SmallVectorImpl<T> &LHS, tf::SmallVectorImpl<T> &RHS) { | |
LHS.swap(RHS); | |
} | |
/// Implement std::swap in terms of SmallVector swap. | |
template<typename T, unsigned N> | |
inline void | |
swap(tf::SmallVector<T, N> &LHS, tf::SmallVector<T, N> &RHS) { | |
LHS.swap(RHS); | |
} | |
} // end of namespace std ---------------------------------------------------- | |
#include <type_traits> | |
#include <iterator> | |
#include <iostream> | |
#include <fstream> | |
#include <stack> | |
#include <queue> | |
#include <vector> | |
#include <algorithm> | |
#include <memory> | |
#include <functional> | |
#include <map> | |
#include <set> | |
#include <unordered_map> | |
#include <unordered_set> | |
#include <sstream> | |
#include <list> | |
#include <forward_list> | |
#include <numeric> | |
#include <iomanip> | |
#include <cassert> | |
#include <cmath> | |
#include <array> | |
#include <string> | |
#include <variant> | |
#include <optional> | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Supported C++ STL type | |
// ---------------------------------------------------------------------------- | |
// std::basic_string | |
template <typename T> | |
struct is_std_basic_string : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_basic_string <std::basic_string<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_basic_string_v = is_std_basic_string<T>::value; | |
// std::array | |
template <typename T> | |
struct is_std_array : std::false_type {}; | |
template <typename T, size_t N> | |
struct is_std_array <std::array<T, N>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_array_v = is_std_array<T>::value; | |
// std::vector | |
template <typename T> | |
struct is_std_vector : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_vector <std::vector<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_vector_v = is_std_vector<T>::value; | |
// std::deque | |
template <typename T> | |
struct is_std_deque : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_deque <std::deque<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_deque_v = is_std_deque<T>::value; | |
// std::list | |
template <typename T> | |
struct is_std_list : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_list <std::list<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_list_v = is_std_list<T>::value; | |
// std::forward_list | |
template <typename T> | |
struct is_std_forward_list : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_forward_list <std::forward_list<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_forward_list_v = is_std_forward_list<T>::value; | |
// std::map | |
template <typename T> | |
struct is_std_map : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_map <std::map<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_map_v = is_std_map<T>::value; | |
// std::unordered_map | |
template <typename T> | |
struct is_std_unordered_map : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_unordered_map <std::unordered_map<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_unordered_map_v = is_std_unordered_map<T>::value; | |
// std::set | |
template <typename T> | |
struct is_std_set : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_set <std::set<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_set_v = is_std_set<T>::value; | |
// std::unordered_set | |
template <typename T> | |
struct is_std_unordered_set : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_unordered_set <std::unordered_set<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_unordered_set_v = is_std_unordered_set<T>::value; | |
// std::variant | |
template <typename T> | |
struct is_std_variant : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_variant <std::variant<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_variant_v = is_std_variant<T>::value; | |
// std::optional | |
template <typename T> | |
struct is_std_optional : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_optional <std::optional<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_optional_v = is_std_optional<T>::value; | |
// std::unique_ptr | |
template <typename T> | |
struct is_std_unique_ptr : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_unique_ptr <std::unique_ptr<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_unique_ptr_v = is_std_unique_ptr<T>::value; | |
// std::shared_ptr | |
template <typename T> | |
struct is_std_shared_ptr : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_shared_ptr <std::shared_ptr<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_shared_ptr_v = is_std_shared_ptr<T>::value; | |
// std::duration | |
template <typename T> struct is_std_duration : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_duration<std::chrono::duration<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_duration_v = is_std_duration<T>::value; | |
// std::time_point | |
template <typename T> | |
struct is_std_time_point : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_time_point<std::chrono::time_point<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_time_point_v = is_std_time_point<T>::value; | |
// std::tuple | |
template <typename T> | |
struct is_std_tuple : std::false_type {}; | |
template <typename... ArgsT> | |
struct is_std_tuple<std::tuple<ArgsT...>> : std::true_type {}; | |
template <typename T> | |
constexpr bool is_std_tuple_v = is_std_tuple<T>::value; | |
//----------------------------------------------------------------------------- | |
// Type extraction. | |
//----------------------------------------------------------------------------- | |
// ExtractType: forward declaration | |
template <size_t, typename> | |
struct ExtractType; | |
// ExtractType_t: alias interface | |
template <size_t idx, typename C> | |
using ExtractType_t = typename ExtractType<idx, C>::type; | |
// ExtractType: base | |
template <template <typename...> typename C, typename T, typename... RestT> | |
struct ExtractType <0, C<T, RestT...>> { | |
using type = T; | |
}; | |
// ExtractType: base | |
template <typename T> | |
struct ExtractType <0, T> { | |
using type = T; | |
}; | |
// ExtractType: recursive definition. | |
template <size_t idx, template <typename...> typename C, typename T, typename... RestT> | |
struct ExtractType <idx, C<T, RestT...>> : ExtractType<idx-1, C<RestT...>> { | |
}; | |
// ---------------------------------------------------------------------------- | |
// Size Wrapper | |
// ---------------------------------------------------------------------------- | |
// Struct: SizeTag | |
// Class that wraps a given size item which can be customized. | |
template <typename T> | |
class SizeTag { | |
public: | |
using type = std::conditional_t<std::is_lvalue_reference_v<T>, T, std::decay_t<T>>; | |
SizeTag(T&& item) : _item(std::forward<T>(item)) {} | |
SizeTag& operator = (const SizeTag&) = delete; | |
inline const T& get() const {return _item;} | |
template <typename ArchiverT> | |
auto save(ArchiverT & ar) const { return ar(_item); } | |
template <typename ArchiverT> | |
auto load(ArchiverT & ar) { return ar(_item); } | |
private: | |
type _item; | |
}; | |
// Function: make_size_tag | |
template <typename T> | |
SizeTag<T> make_size_tag(T&& t) { | |
return { std::forward<T>(t) }; | |
} | |
// ---------------------------------------------------------------------------- | |
// Size Wrapper | |
// ---------------------------------------------------------------------------- | |
// Class: MapItem | |
template <typename KeyT, typename ValueT> | |
class MapItem { | |
public: | |
using KeyType = std::conditional_t <std::is_lvalue_reference_v<KeyT>, KeyT, std::decay_t<KeyT>>; | |
using ValueType = std::conditional_t <std::is_lvalue_reference_v<ValueT>, ValueT, std::decay_t<ValueT>>; | |
MapItem(KeyT&& k, ValueT&& v) : _key(std::forward<KeyT>(k)), _value(std::forward<ValueT>(v)) {} | |
MapItem& operator = (const MapItem&) = delete; | |
inline const KeyT& key() const { return _key; } | |
inline const ValueT& value() const { return _value; } | |
template <typename ArchiverT> | |
auto save(ArchiverT & ar) const { return ar(_key, _value); } | |
template <typename ArchiverT> | |
auto load(ArchiverT & ar) { return ar(_key, _value); } | |
private: | |
KeyType _key; | |
ValueType _value; | |
}; | |
// Function: make_kv_pair | |
template <typename KeyT, typename ValueT> | |
MapItem<KeyT, ValueT> make_kv_pair(KeyT&& k, ValueT&& v) { | |
return { std::forward<KeyT>(k), std::forward<ValueT>(v) }; | |
} | |
// ---------------------------------------------------------------------------- | |
// Serializer Definition | |
// ---------------------------------------------------------------------------- | |
template <typename T> | |
constexpr auto is_default_serializable_v = ( | |
std::is_arithmetic_v<T> || | |
std::is_enum_v<T> || | |
is_std_basic_string_v<T> || | |
is_std_vector_v<T> || | |
is_std_deque_v<T> || | |
is_std_list_v<T> || | |
is_std_forward_list_v<T> || | |
is_std_map_v<T> || | |
is_std_unordered_map_v<T> || | |
is_std_set_v<T> || | |
is_std_unordered_set_v<T> || | |
is_std_duration_v<T> || | |
is_std_time_point_v<T> || | |
is_std_variant_v<T> || | |
is_std_optional_v<T> || | |
is_std_tuple_v<T> || | |
is_std_array_v<T> | |
); | |
// Class: Serializer | |
template <typename Stream, typename SizeType = std::streamsize> | |
class Serializer { | |
public: | |
Serializer(Stream& stream); | |
template <typename... T> | |
SizeType operator()(T&&... items); | |
private: | |
Stream& _stream; | |
template <typename T, | |
std::enable_if_t<!is_default_serializable_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<std::is_arithmetic_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_basic_string_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_vector_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t< | |
is_std_deque_v<std::decay_t<T>> || | |
is_std_list_v<std::decay_t<T>>, | |
void | |
>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t< | |
is_std_forward_list_v<std::decay_t<T>>, | |
void | |
>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t< | |
is_std_map_v<std::decay_t<T>> || | |
is_std_unordered_map_v<std::decay_t<T>>, | |
void | |
>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t< | |
is_std_set_v<std::decay_t<T>> || | |
is_std_unordered_set_v<std::decay_t<T>>, | |
void | |
>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<std::is_enum_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_duration_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_time_point_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_optional_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_variant_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_tuple_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_array_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _save(T&&); | |
}; | |
// Constructor | |
template <typename Stream, typename SizeType> | |
Serializer<Stream, SizeType>::Serializer(Stream& stream) : _stream(stream) { | |
} | |
// Operator () | |
template <typename Stream, typename SizeType> | |
template <typename... T> | |
SizeType Serializer<Stream, SizeType>::operator() (T&&... items) { | |
return (_save(std::forward<T>(items)) + ...); | |
} | |
// arithmetic data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<std::is_arithmetic_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
_stream.write(reinterpret_cast<const char*>(std::addressof(t)), sizeof(t)); | |
return sizeof(t); | |
} | |
// std::basic_string | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_basic_string_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
using U = std::decay_t<T>; | |
auto sz = _save(make_size_tag(t.size())); | |
_stream.write( | |
reinterpret_cast<const char*>(t.data()), | |
t.size()*sizeof(typename U::value_type) | |
); | |
return sz + t.size()*sizeof(typename U::value_type); | |
} | |
// std::vector | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_vector_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
using U = std::decay_t<T>; | |
auto sz = _save(make_size_tag(t.size())); | |
if constexpr (std::is_arithmetic_v<typename U::value_type>) { | |
_stream.write( | |
reinterpret_cast<const char*>(t.data()), | |
t.size() * sizeof(typename U::value_type) | |
); | |
sz += t.size() * sizeof(typename U::value_type); | |
} else { | |
for(auto&& item : t) { | |
sz += _save(item); | |
} | |
} | |
return sz; | |
} | |
// std::list and std::deque | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_deque_v<std::decay_t<T>> || | |
is_std_list_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
auto sz = _save(make_size_tag(t.size())); | |
for(auto&& item : t) { | |
sz += _save(item); | |
} | |
return sz; | |
} | |
// std::forward_list | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_forward_list_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
auto sz = _save(make_size_tag(std::distance(t.begin(), t.end()))); | |
for(auto&& item : t) { | |
sz += _save(item); | |
} | |
return sz; | |
} | |
// std::map and std::unordered_map | |
template <typename Stream, typename SizeType> | |
template <typename T, std::enable_if_t< | |
is_std_map_v<std::decay_t<T>> || | |
is_std_unordered_map_v<std::decay_t<T>>, | |
void | |
>*> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
auto sz = _save(make_size_tag(t.size())); | |
for(auto&& [k, v] : t) { | |
sz += _save(make_kv_pair(k, v)); | |
} | |
return sz; | |
} | |
// std::set and std::unordered_set | |
template <typename Stream, typename SizeType> | |
template <typename T, std::enable_if_t< | |
is_std_set_v<std::decay_t<T>> || | |
is_std_unordered_set_v<std::decay_t<T>>, | |
void | |
>*> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
auto sz = _save(make_size_tag(t.size())); | |
for(auto&& item : t) { | |
sz += _save(item); | |
} | |
return sz; | |
} | |
// enum data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<std::is_enum_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
using U = std::decay_t<T>; | |
return _save(static_cast<std::underlying_type_t<U>>(t)); | |
} | |
// duration data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_duration_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
return _save(t.count()); | |
} | |
// time point data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_time_point_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
return _save(t.time_since_epoch()); | |
} | |
// optional data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_optional_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
if(bool flag = t.has_value(); flag) { | |
return _save(flag) + _save(*t); | |
} | |
else { | |
return _save(flag); | |
} | |
} | |
// variant type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_variant_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
return _save(t.index()) + | |
std::visit([&] (auto&& arg){ return _save(arg);}, t); | |
} | |
// tuple type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_tuple_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
return std::apply( | |
[&] (auto&&... args) { | |
return (_save(std::forward<decltype(args)>(args)) + ... + 0); | |
}, | |
std::forward<T>(t) | |
); | |
} | |
// array | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_array_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
using U = std::decay_t<T>; | |
static_assert(std::tuple_size<U>::value > 0, "Array size can't be zero"); | |
SizeType sz; | |
if constexpr(std::is_arithmetic_v<typename U::value_type>) { | |
_stream.write(reinterpret_cast<const char*>(t.data()), sizeof(t)); | |
sz = sizeof(t); | |
} | |
else { | |
sz = 0; | |
for(auto&& item : t) { | |
sz += _save(item); | |
} | |
} | |
return sz; | |
} | |
// custom save method | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<!is_default_serializable_v<std::decay_t<T>>, void>* | |
> | |
SizeType Serializer<Stream, SizeType>::_save(T&& t) { | |
return t.save(*this); | |
} | |
// ---------------------------------------------------------------------------- | |
// DeSerializer Definition | |
// ---------------------------------------------------------------------------- | |
template <typename T> | |
constexpr auto is_default_deserializable_v = | |
std::is_arithmetic_v<T> || | |
std::is_enum_v<T> || | |
is_std_basic_string_v<T> || | |
is_std_vector_v<T> || | |
is_std_deque_v<T> || | |
is_std_list_v<T> || | |
is_std_forward_list_v<T> || | |
is_std_map_v<T> || | |
is_std_unordered_map_v<T> || | |
is_std_set_v<T> || | |
is_std_unordered_set_v<T> || | |
is_std_duration_v<T> || | |
is_std_time_point_v<T> || | |
is_std_variant_v<T> || | |
is_std_optional_v<T> || | |
is_std_tuple_v<T> || | |
is_std_array_v<T>; | |
// Class: Deserializer | |
template <typename Stream, typename SizeType = std::streamsize> | |
class Deserializer { | |
public: | |
Deserializer(Stream& stream); | |
template <typename... T> | |
SizeType operator()(T&&... items); | |
private: | |
Stream& _stream; | |
// Function: _variant_helper | |
template < | |
size_t I = 0, typename... ArgsT, | |
std::enable_if_t<I==sizeof...(ArgsT)>* = nullptr | |
> | |
SizeType _variant_helper(size_t, std::variant<ArgsT...>&); | |
// Function: _variant_helper | |
template < | |
size_t I = 0, typename... ArgsT, | |
std::enable_if_t<I<sizeof...(ArgsT)>* = nullptr | |
> | |
SizeType _variant_helper(size_t, std::variant<ArgsT...>&); | |
template <typename T, | |
std::enable_if_t<std::is_arithmetic_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_basic_string_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_vector_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t< | |
is_std_deque_v<std::decay_t<T>> || | |
is_std_list_v<std::decay_t<T>> || | |
is_std_forward_list_v<std::decay_t<T>>, | |
void | |
>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_map_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_unordered_map_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_set_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_unordered_set_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<std::is_enum_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_duration_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_time_point_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_optional_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_variant_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_tuple_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<is_std_array_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
template <typename T, | |
std::enable_if_t<!is_default_deserializable_v<std::decay_t<T>>, void>* = nullptr | |
> | |
SizeType _load(T&&); | |
}; | |
// Constructor | |
template <typename Stream, typename SizeType> | |
Deserializer<Stream, SizeType>::Deserializer(Stream& stream) : _stream(stream) { | |
} | |
// Operator () | |
template <typename Stream, typename SizeType> | |
template <typename... T> | |
SizeType Deserializer<Stream, SizeType>::operator() (T&&... items) { | |
return (_load(std::forward<T>(items)) + ...); | |
} | |
// Function: _variant_helper | |
template <typename Stream, typename SizeType> | |
template <size_t I, typename... ArgsT, std::enable_if_t<I==sizeof...(ArgsT)>*> | |
SizeType Deserializer<Stream, SizeType>::_variant_helper(size_t, std::variant<ArgsT...>&) { | |
return 0; | |
} | |
// Function: _variant_helper | |
template <typename Stream, typename SizeType> | |
template <size_t I, typename... ArgsT, std::enable_if_t<I<sizeof...(ArgsT)>*> | |
SizeType Deserializer<Stream, SizeType>::_variant_helper(size_t i, std::variant<ArgsT...>& v) { | |
if(i == 0) { | |
using type = ExtractType_t<I, std::variant<ArgsT...>>; | |
if(v.index() != I) { | |
static_assert( | |
std::is_default_constructible<type>::value, | |
"Failed to archive variant (type should be default constructible T())" | |
); | |
v = type(); | |
} | |
return _load(*std::get_if<type>(&v)); | |
} | |
return _variant_helper<I+1, ArgsT...>(i-1, v); | |
} | |
// arithmetic data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<std::is_arithmetic_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
_stream.read(reinterpret_cast<char*>(std::addressof(t)), sizeof(t)); | |
return sizeof(t); | |
} | |
// std::basic_string | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_basic_string_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::size_type num_chars; | |
auto sz = _load(make_size_tag(num_chars)); | |
t.resize(num_chars); | |
_stream.read(reinterpret_cast<char*>(t.data()), num_chars*sizeof(typename U::value_type)); | |
return sz + num_chars*sizeof(typename U::value_type); | |
} | |
// std::vector | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_vector_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::size_type num_data; | |
auto sz = _load(make_size_tag(num_data)); | |
if constexpr(std::is_arithmetic_v<typename U::value_type>) { | |
t.resize(num_data); | |
_stream.read(reinterpret_cast<char*>(t.data()), num_data * sizeof(typename U::value_type)); | |
sz += num_data * sizeof(typename U::value_type); | |
} | |
else { | |
t.resize(num_data); | |
for(auto && v : t) { | |
sz += _load(v); | |
} | |
} | |
return sz; | |
} | |
// std::list and std::deque | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_deque_v<std::decay_t<T>> || | |
is_std_list_v<std::decay_t<T>> || | |
is_std_forward_list_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::size_type num_data; | |
auto sz = _load(make_size_tag(num_data)); | |
t.resize(num_data); | |
for(auto && v : t) { | |
sz += _load(v); | |
} | |
return sz; | |
} | |
// std::map | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_map_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::size_type num_data; | |
auto sz = _load(make_size_tag(num_data)); | |
t.clear(); | |
auto hint = t.begin(); | |
typename U::key_type k; | |
typename U::mapped_type v; | |
for(size_t i=0; i<num_data; ++i) { | |
sz += _load(make_kv_pair(k, v)); | |
hint = t.emplace_hint(hint, std::move(k), std::move(v)); | |
} | |
return sz; | |
} | |
// std::unordered_map | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_unordered_map_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::size_type num_data; | |
auto sz = _load(make_size_tag(num_data)); | |
t.clear(); | |
t.reserve(num_data); | |
typename U::key_type k; | |
typename U::mapped_type v; | |
for(size_t i=0; i<num_data; ++i) { | |
sz += _load(make_kv_pair(k, v)); | |
t.emplace(std::move(k), std::move(v)); | |
} | |
return sz; | |
} | |
// std::set | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_set_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::size_type num_data; | |
auto sz = _load(make_size_tag(num_data)); | |
t.clear(); | |
auto hint = t.begin(); | |
typename U::key_type k; | |
for(size_t i=0; i<num_data; ++i) { | |
sz += _load(k); | |
hint = t.emplace_hint(hint, std::move(k)); | |
} | |
return sz; | |
} | |
// std::unordered_set | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_unordered_set_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::size_type num_data; | |
auto sz = _load(make_size_tag(num_data)); | |
t.clear(); | |
t.reserve(num_data); | |
typename U::key_type k; | |
for(size_t i=0; i<num_data; ++i) { | |
sz += _load(k); | |
t.emplace(std::move(k)); | |
} | |
return sz; | |
} | |
// enum data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<std::is_enum_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
std::underlying_type_t<U> k; | |
auto sz = _load(k); | |
t = static_cast<U>(k); | |
return sz; | |
} | |
// duration data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_duration_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::rep count; | |
auto s = _load(count); | |
t = U{count}; | |
return s; | |
} | |
// time point data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_time_point_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
typename U::duration elapsed; | |
auto s = _load(elapsed); | |
t = U{elapsed}; | |
return s; | |
} | |
// optional data type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_optional_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
bool has_value; | |
auto s = _load(has_value); | |
if(has_value) { | |
if(!t) { | |
t = typename U::value_type(); | |
} | |
s += _load(*t); | |
} | |
else { | |
t.reset(); | |
} | |
return s; | |
} | |
// variant type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_variant_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
std::decay_t<decltype(t.index())> idx; | |
auto s = _load(idx); | |
return s + _variant_helper(idx, t); | |
} | |
// tuple type | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_tuple_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
return std::apply( | |
[&] (auto&&... args) { | |
return (_load(std::forward<decltype(args)>(args)) + ... + 0); | |
}, | |
std::forward<T>(t) | |
); | |
} | |
// array | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<is_std_array_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
using U = std::decay_t<T>; | |
static_assert(std::tuple_size<U>::value > 0, "Array size can't be zero"); | |
SizeType sz; | |
if constexpr(std::is_arithmetic_v<typename U::value_type>) { | |
_stream.read(reinterpret_cast<char*>(t.data()), sizeof(t)); | |
sz = sizeof(t); | |
} | |
else { | |
sz = 0; | |
for(auto && v : t) { | |
sz += _load(v); | |
} | |
} | |
return sz; | |
} | |
// custom save method | |
template <typename Stream, typename SizeType> | |
template <typename T, | |
std::enable_if_t<!is_default_deserializable_v<std::decay_t<T>>, void>* | |
> | |
SizeType Deserializer<Stream, SizeType>::_load(T&& t) { | |
return t.load(*this); | |
} | |
} // ned of namespace tf ----------------------------------------------------- | |
#include <iostream> | |
#include <sstream> | |
#include <exception> | |
#include <iostream> | |
#include <string> | |
namespace tf { | |
// Procedure: ostreamize | |
template <typename T> | |
void ostreamize(std::ostream& os, T&& token) { | |
os << std::forward<T>(token); | |
} | |
// Procedure: ostreamize | |
template <typename T, typename... Rest> | |
void ostreamize(std::ostream& os, T&& token, Rest&&... rest) { | |
os << std::forward<T>(token); | |
ostreamize(os, std::forward<Rest>(rest)...); | |
} | |
// Function: stringify | |
template <typename... ArgsT> | |
std::string stringify(ArgsT&&... args) { | |
std::ostringstream oss; | |
ostreamize(oss, std::forward<ArgsT>(args)...); | |
return oss.str(); | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
namespace tf { | |
// Procedure: throw_se | |
// Throws the system error under a given error code. | |
template <typename... ArgsT> | |
//void throw_se(const char* fname, const size_t line, Error::Code c, ArgsT&&... args) { | |
void throw_re(const char* fname, const size_t line, ArgsT&&... args) { | |
std::ostringstream oss; | |
oss << "[" << fname << ":" << line << "] "; | |
//ostreamize(oss, std::forward<ArgsT>(args)...); | |
(oss << ... << args); | |
throw std::runtime_error(oss.str()); | |
} | |
} // ------------------------------------------------------------------------ | |
#define TF_THROW(...) tf::throw_re(__FILE__, __LINE__, __VA_ARGS__); | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// taskflow | |
// ---------------------------------------------------------------------------- | |
class AsyncTopology; | |
class Node; | |
class Graph; | |
class FlowBuilder; | |
class Semaphore; | |
class Subflow; | |
class Runtime; | |
class Task; | |
class TaskView; | |
class Taskflow; | |
class Topology; | |
class TopologyBase; | |
class Executor; | |
class Worker; | |
class WorkerView; | |
class ObserverInterface; | |
class ChromeTracingObserver; | |
class TFProfObserver; | |
class TFProfManager; | |
template <typename T> | |
class Future; | |
template <typename...Fs> | |
class Pipeline; | |
// ---------------------------------------------------------------------------- | |
// cudaFlow | |
// ---------------------------------------------------------------------------- | |
class cudaFlowNode; | |
class cudaFlowGraph; | |
class cudaTask; | |
class cudaFlow; | |
class cudaFlowCapturer; | |
class cudaFlowOptimizerBase; | |
class cudaFlowLinearOptimizer; | |
class cudaFlowSequentialOptimizer; | |
class cudaFlowRoundRobinOptimizer; | |
// ---------------------------------------------------------------------------- | |
// syclFlow | |
// ---------------------------------------------------------------------------- | |
class syclNode; | |
class syclGraph; | |
class syclTask; | |
class syclFlow; | |
} // end of namespace tf ----------------------------------------------------- | |
#include <vector> | |
#include <mutex> | |
/** | |
@file semaphore.hpp | |
@brief semaphore include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Semaphore | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Semaphore | |
@brief class to create a semophore object for building a concurrency constraint | |
A semaphore creates a constraint that limits the maximum concurrency, | |
i.e., the number of workers, in a set of tasks. | |
You can let a task acquire/release one or multiple semaphores before/after | |
executing its work. | |
A task can acquire and release a semaphore, | |
or just acquire or just release it. | |
A tf::Semaphore object starts with an initial count. | |
As long as that count is above 0, tasks can acquire the semaphore and do | |
their work. | |
If the count is 0 or less, a task trying to acquire the semaphore will not run | |
but goes to a waiting list of that semaphore. | |
When the semaphore is released by another task, | |
it reschedules all tasks on that waiting list. | |
@code{.cpp} | |
tf::Executor executor(8); // create an executor of 8 workers | |
tf::Taskflow taskflow; | |
tf::Semaphore semaphore(1); // create a semaphore with initial count 1 | |
std::vector<tf::Task> tasks { | |
taskflow.emplace([](){ std::cout << "A" << std::endl; }), | |
taskflow.emplace([](){ std::cout << "B" << std::endl; }), | |
taskflow.emplace([](){ std::cout << "C" << std::endl; }), | |
taskflow.emplace([](){ std::cout << "D" << std::endl; }), | |
taskflow.emplace([](){ std::cout << "E" << std::endl; }) | |
}; | |
for(auto & task : tasks) { // each task acquires and release the semaphore | |
task.acquire(semaphore); | |
task.release(semaphore); | |
} | |
executor.run(taskflow).wait(); | |
@endcode | |
The above example creates five tasks with no dependencies between them. | |
Under normal circumstances, the five tasks would be executed concurrently. | |
However, this example has a semaphore with initial count 1, | |
and all tasks need to acquire that semaphore before running and release that | |
semaphore after they are done. | |
This arrangement limits the number of concurrently running tasks to only one. | |
*/ | |
class Semaphore { | |
friend class Node; | |
public: | |
/** | |
@brief constructs a semaphore with the given counter | |
A semaphore creates a constraint that limits the maximum concurrency, | |
i.e., the number of workers, in a set of tasks. | |
@code{.cpp} | |
tf::Semaphore semaphore(4); // concurrency constraint of 4 workers | |
@endcode | |
*/ | |
explicit Semaphore(size_t max_workers); | |
/** | |
@brief queries the counter value (not thread-safe during the run) | |
*/ | |
size_t count() const; | |
private: | |
std::mutex _mtx; | |
size_t _counter; | |
std::vector<Node*> _waiters; | |
bool _try_acquire_or_wait(Node*); | |
std::vector<Node*> _release(); | |
}; | |
inline Semaphore::Semaphore(size_t max_workers) : | |
_counter(max_workers) { | |
} | |
inline bool Semaphore::_try_acquire_or_wait(Node* me) { | |
std::lock_guard<std::mutex> lock(_mtx); | |
if(_counter > 0) { | |
--_counter; | |
return true; | |
} | |
else { | |
_waiters.push_back(me); | |
return false; | |
} | |
} | |
inline std::vector<Node*> Semaphore::_release() { | |
std::lock_guard<std::mutex> lock(_mtx); | |
++_counter; | |
std::vector<Node*> r{std::move(_waiters)}; | |
return r; | |
} | |
inline size_t Semaphore::count() const { | |
return _counter; | |
} | |
} // end of namespace tf. --------------------------------------------------- | |
#define TF_ENABLE_PROFILER "TF_ENABLE_PROFILER" | |
namespace tf { | |
} // end of namespace tf ----------------------------------------------------- | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// class: TopologyBase | |
class TopologyBase { | |
friend class Executor; | |
friend class Node; | |
template <typename T> | |
friend class Future; | |
protected: | |
std::atomic<bool> _is_cancelled { false }; | |
}; | |
// ---------------------------------------------------------------------------- | |
// class: Topology | |
class Topology : public TopologyBase { | |
friend class Executor; | |
friend class Runtime; | |
public: | |
template <typename P, typename C> | |
Topology(Taskflow&, P&&, C&&); | |
private: | |
Taskflow& _taskflow; | |
std::promise<void> _promise; | |
SmallVector<Node*> _sources; | |
std::function<bool()> _pred; | |
std::function<void()> _call; | |
std::atomic<size_t> _join_counter {0}; | |
}; | |
// Constructor | |
template <typename P, typename C> | |
Topology::Topology(Taskflow& tf, P&& p, C&& c): | |
_taskflow(tf), | |
_pred {std::forward<P>(p)}, | |
_call {std::forward<C>(c)} { | |
} | |
} // end of namespace tf. ---------------------------------------------------- | |
#if defined(_MSC_VER) | |
#define TF_FORCE_INLINE __forceinline | |
#elif defined(__GNUC__) && __GNUC__ > 3 | |
#define TF_FORCE_INLINE __attribute__((__always_inline__)) inline | |
#else | |
#define TF_FORCE_INLINE inline | |
#endif | |
#if defined(_MSC_VER) | |
#define TF_NO_INLINE __declspec(noinline) | |
#elif defined(__GNUC__) && __GNUC__ > 3 | |
#define TF_NO_INLINE __attribute__((__noinline__)) | |
#else | |
#define TF_NO_INLINE | |
#endif | |
/** | |
@file tsq.hpp | |
@brief task queue include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Task Types | |
// ---------------------------------------------------------------------------- | |
/** | |
@enum TaskPriority | |
@brief enumeration of all task priority values | |
A priority is an enumerated value of type @c unsigned. | |
Currently, %Taskflow defines three priority levels, | |
@c HIGH, @c NORMAL, and @c LOW, starting from 0, 1, to 2. | |
That is, the lower the value, the higher the priority. | |
*/ | |
enum class TaskPriority : unsigned { | |
/** @brief value of the highest priority (i.e., 0) */ | |
HIGH = 0, | |
/** @brief value of the normal priority (i.e., 1) */ | |
NORMAL = 1, | |
/** @brief value of the lowest priority (i.e., 2) */ | |
LOW = 2, | |
/** @brief conventional value for iterating priority values */ | |
MAX = 3 | |
}; | |
// ---------------------------------------------------------------------------- | |
// Task Queue | |
// ---------------------------------------------------------------------------- | |
/** | |
@class: TaskQueue | |
@tparam T data type (must be a pointer type) | |
@tparam TF_MAX_PRIORITY maximum level of the priority | |
@brief class to create a lock-free unbounded single-producer multiple-consumer queue | |
This class implements the work-stealing queue described in the paper, | |
<a href="https://www.di.ens.fr/~zappa/readings/ppopp13.pdf">Correct and Efficient Work-Stealing for Weak Memory Models</a>, | |
and extends it to include priority. | |
Only the queue owner can perform pop and push operations, | |
while others can steal data from the queue simultaneously. | |
Priority starts from zero (highest priority) to the template value | |
`TF_MAX_PRIORITY-1` (lowest priority). | |
All operations are associated with priority values to indicate | |
the corresponding queues to which an operation is applied. | |
The default template value, `TF_MAX_PRIORITY`, is `TaskPriority::MAX` | |
which applies only three priority levels to the task queue. | |
@code{.cpp} | |
auto [A, B, C, D, E] = taskflow.emplace( | |
[] () { }, | |
[&] () { | |
std::cout << "Task B: " << counter++ << '\n'; // 0 | |
}, | |
[&] () { | |
std::cout << "Task C: " << counter++ << '\n'; // 2 | |
}, | |
[&] () { | |
std::cout << "Task D: " << counter++ << '\n'; // 1 | |
}, | |
[] () { } | |
); | |
A.precede(B, C, D); | |
E.succeed(B, C, D); | |
B.priority(tf::TaskPriority::HIGH); | |
C.priority(tf::TaskPriority::LOW); | |
D.priority(tf::TaskPriority::NORMAL); | |
executor.run(taskflow).wait(); | |
@endcode | |
In the above example, we have a task graph of five tasks, | |
@c A, @c B, @c C, @c D, and @c E, in which @c B, @c C, and @c D | |
can run in simultaneously when @c A finishes. | |
Since we only uses one worker thread in the executor, | |
we can deterministically run @c B first, then @c D, and @c C | |
in order of their priority values. | |
The output is as follows: | |
@code{.shell-session} | |
Task B: 0 | |
Task D: 1 | |
Task C: 2 | |
@endcode | |
*/ | |
template <typename T, unsigned TF_MAX_PRIORITY = static_cast<unsigned>(TaskPriority::MAX)> | |
class TaskQueue { | |
static_assert(TF_MAX_PRIORITY > 0, "TF_MAX_PRIORITY must be at least one"); | |
static_assert(std::is_pointer_v<T>, "T must be a pointer type"); | |
struct Array { | |
int64_t C; | |
int64_t M; | |
std::atomic<T>* S; | |
explicit Array(int64_t c) : | |
C {c}, | |
M {c-1}, | |
S {new std::atomic<T>[static_cast<size_t>(C)]} { | |
} | |
~Array() { | |
delete [] S; | |
} | |
int64_t capacity() const noexcept { | |
return C; | |
} | |
void push(int64_t i, T o) noexcept { | |
S[i & M].store(o, std::memory_order_relaxed); | |
} | |
T pop(int64_t i) noexcept { | |
return S[i & M].load(std::memory_order_relaxed); | |
} | |
Array* resize(int64_t b, int64_t t) { | |
Array* ptr = new Array {2*C}; | |
for(int64_t i=t; i!=b; ++i) { | |
ptr->push(i, pop(i)); | |
} | |
return ptr; | |
} | |
}; | |
// Doubling the alignment by 2 seems to generate the most | |
// decent performance. | |
CachelineAligned<std::atomic<int64_t>> _top[TF_MAX_PRIORITY]; | |
CachelineAligned<std::atomic<int64_t>> _bottom[TF_MAX_PRIORITY]; | |
std::atomic<Array*> _array[TF_MAX_PRIORITY]; | |
std::vector<Array*> _garbage[TF_MAX_PRIORITY]; | |
//std::atomic<T> _cache {nullptr}; | |
public: | |
/** | |
@brief constructs the queue with a given capacity | |
@param capacity the capacity of the queue (must be power of 2) | |
*/ | |
explicit TaskQueue(int64_t capacity = 512); | |
/** | |
@brief destructs the queue | |
*/ | |
~TaskQueue(); | |
/** | |
@brief queries if the queue is empty at the time of this call | |
*/ | |
bool empty() const noexcept; | |
/** | |
@brief queries if the queue is empty at a specific priority value | |
*/ | |
bool empty(unsigned priority) const noexcept; | |
/** | |
@brief queries the number of items at the time of this call | |
*/ | |
size_t size() const noexcept; | |
/** | |
@brief queries the number of items with the given priority | |
at the time of this call | |
*/ | |
size_t size(unsigned priority) const noexcept; | |
/** | |
@brief queries the capacity of the queue | |
*/ | |
int64_t capacity() const noexcept; | |
/** | |
@brief queries the capacity of the queue at a specific priority value | |
*/ | |
int64_t capacity(unsigned priority) const noexcept; | |
/** | |
@brief inserts an item to the queue | |
@param item the item to push to the queue | |
@param priority priority value of the item to push (default = 0) | |
Only the owner thread can insert an item to the queue. | |
The operation can trigger the queue to resize its capacity | |
if more space is required. | |
*/ | |
TF_FORCE_INLINE void push(T item, unsigned priority); | |
/** | |
@brief pops out an item from the queue | |
Only the owner thread can pop out an item from the queue. | |
The return can be a @c nullptr if this operation failed (empty queue). | |
*/ | |
T pop(); | |
/** | |
@brief pops out an item with a specific priority value from the queue | |
@param priority priority of the item to pop | |
Only the owner thread can pop out an item from the queue. | |
The return can be a @c nullptr if this operation failed (empty queue). | |
*/ | |
TF_FORCE_INLINE T pop(unsigned priority); | |
/** | |
@brief steals an item from the queue | |
Any threads can try to steal an item from the queue. | |
The return can be a @c nullptr if this operation failed (not necessary empty). | |
*/ | |
T steal(); | |
/** | |
@brief steals an item with a specific priority value from the queue | |
@param priority priority of the item to steal | |
Any threads can try to steal an item from the queue. | |
The return can be a @c nullptr if this operation failed (not necessary empty). | |
*/ | |
T steal(unsigned priority); | |
private: | |
TF_NO_INLINE Array* resize_array(Array* a, unsigned p, std::int64_t b, std::int64_t t); | |
}; | |
// Constructor | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
TaskQueue<T, TF_MAX_PRIORITY>::TaskQueue(int64_t c) { | |
assert(c && (!(c & (c-1)))); | |
unroll<0, TF_MAX_PRIORITY, 1>([&](auto p){ | |
_top[p].data.store(0, std::memory_order_relaxed); | |
_bottom[p].data.store(0, std::memory_order_relaxed); | |
_array[p].store(new Array{c}, std::memory_order_relaxed); | |
_garbage[p].reserve(32); | |
}); | |
} | |
// Destructor | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
TaskQueue<T, TF_MAX_PRIORITY>::~TaskQueue() { | |
unroll<0, TF_MAX_PRIORITY, 1>([&](auto p){ | |
for(auto a : _garbage[p]) { | |
delete a; | |
} | |
delete _array[p].load(); | |
}); | |
} | |
// Function: empty | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
bool TaskQueue<T, TF_MAX_PRIORITY>::empty() const noexcept { | |
for(unsigned i=0; i<TF_MAX_PRIORITY; i++) { | |
if(!empty(i)) { | |
return false; | |
} | |
} | |
return true; | |
} | |
// Function: empty | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
bool TaskQueue<T, TF_MAX_PRIORITY>::empty(unsigned p) const noexcept { | |
int64_t b = _bottom[p].data.load(std::memory_order_relaxed); | |
int64_t t = _top[p].data.load(std::memory_order_relaxed); | |
return (b <= t); | |
} | |
// Function: size | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
size_t TaskQueue<T, TF_MAX_PRIORITY>::size() const noexcept { | |
size_t s; | |
unroll<0, TF_MAX_PRIORITY, 1>([&](auto i) { s = i ? size(i) + s : size(i); }); | |
return s; | |
} | |
// Function: size | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
size_t TaskQueue<T, TF_MAX_PRIORITY>::size(unsigned p) const noexcept { | |
int64_t b = _bottom[p].data.load(std::memory_order_relaxed); | |
int64_t t = _top[p].data.load(std::memory_order_relaxed); | |
return static_cast<size_t>(b >= t ? b - t : 0); | |
} | |
// Function: push | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
TF_FORCE_INLINE void TaskQueue<T, TF_MAX_PRIORITY>::push(T o, unsigned p) { | |
int64_t b = _bottom[p].data.load(std::memory_order_relaxed); | |
int64_t t = _top[p].data.load(std::memory_order_acquire); | |
Array* a = _array[p].load(std::memory_order_relaxed); | |
// queue is full | |
if(a->capacity() - 1 < (b - t)) { | |
a = resize_array(a, p, b, t); | |
} | |
a->push(b, o); | |
std::atomic_thread_fence(std::memory_order_release); | |
_bottom[p].data.store(b + 1, std::memory_order_relaxed); | |
} | |
// Function: pop | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
T TaskQueue<T, TF_MAX_PRIORITY>::pop() { | |
for(unsigned i=0; i<TF_MAX_PRIORITY; i++) { | |
if(auto t = pop(i); t) { | |
return t; | |
} | |
} | |
return nullptr; | |
} | |
// Function: pop | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
TF_FORCE_INLINE T TaskQueue<T, TF_MAX_PRIORITY>::pop(unsigned p) { | |
int64_t b = _bottom[p].data.load(std::memory_order_relaxed) - 1; | |
Array* a = _array[p].load(std::memory_order_relaxed); | |
_bottom[p].data.store(b, std::memory_order_relaxed); | |
std::atomic_thread_fence(std::memory_order_seq_cst); | |
int64_t t = _top[p].data.load(std::memory_order_relaxed); | |
T item {nullptr}; | |
if(t <= b) { | |
item = a->pop(b); | |
if(t == b) { | |
// the last item just got stolen | |
if(!_top[p].data.compare_exchange_strong(t, t+1, | |
std::memory_order_seq_cst, | |
std::memory_order_relaxed)) { | |
item = nullptr; | |
} | |
_bottom[p].data.store(b + 1, std::memory_order_relaxed); | |
} | |
} | |
else { | |
_bottom[p].data.store(b + 1, std::memory_order_relaxed); | |
} | |
return item; | |
} | |
// Function: steal | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
T TaskQueue<T, TF_MAX_PRIORITY>::steal() { | |
for(unsigned i=0; i<TF_MAX_PRIORITY; i++) { | |
if(auto t = steal(i); t) { | |
return t; | |
} | |
} | |
return nullptr; | |
} | |
// Function: steal | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
T TaskQueue<T, TF_MAX_PRIORITY>::steal(unsigned p) { | |
int64_t t = _top[p].data.load(std::memory_order_acquire); | |
std::atomic_thread_fence(std::memory_order_seq_cst); | |
int64_t b = _bottom[p].data.load(std::memory_order_acquire); | |
T item {nullptr}; | |
if(t < b) { | |
Array* a = _array[p].load(std::memory_order_consume); | |
item = a->pop(t); | |
if(!_top[p].data.compare_exchange_strong(t, t+1, | |
std::memory_order_seq_cst, | |
std::memory_order_relaxed)) { | |
return nullptr; | |
} | |
} | |
return item; | |
} | |
// Function: capacity | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
int64_t TaskQueue<T, TF_MAX_PRIORITY>::capacity() const noexcept { | |
size_t s; | |
unroll<0, TF_MAX_PRIORITY, 1>([&](auto i) { | |
s = i ? capacity(i) + s : capacity(i); | |
}); | |
return s; | |
} | |
// Function: capacity | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
int64_t TaskQueue<T, TF_MAX_PRIORITY>::capacity(unsigned p) const noexcept { | |
return _array[p].load(std::memory_order_relaxed)->capacity(); | |
} | |
template <typename T, unsigned TF_MAX_PRIORITY> | |
TF_NO_INLINE typename TaskQueue<T, TF_MAX_PRIORITY>::Array* | |
TaskQueue<T, TF_MAX_PRIORITY>::resize_array(Array* a, unsigned p, std::int64_t b, std::int64_t t) { | |
Array* tmp = a->resize(b, t); | |
_garbage[p].push_back(a); | |
std::swap(a, tmp); | |
_array[p].store(a, std::memory_order_release); | |
// Note: the original paper using relaxed causes t-san to complain | |
//_array.store(a, std::memory_order_relaxed); | |
return a; | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
/** | |
@file graph.hpp | |
@brief graph include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Class: Graph | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Graph | |
@brief class to create a graph object | |
A graph is the ultimate storage for a task dependency graph and is the main | |
gateway to interact with an executor. | |
A graph manages a set of nodes in a global object pool that animates and | |
recycles node objects efficiently without going through repetitive and | |
expensive memory allocations and deallocations. | |
This class is mainly used for creating an opaque graph object in a custom | |
class to interact with the executor through taskflow composition. | |
A graph object is move-only. | |
*/ | |
class Graph { | |
friend class Node; | |
friend class FlowBuilder; | |
friend class Subflow; | |
friend class Taskflow; | |
friend class Executor; | |
public: | |
/** | |
@brief constructs a graph object | |
*/ | |
Graph() = default; | |
/** | |
@brief disabled copy constructor | |
*/ | |
Graph(const Graph&) = delete; | |
/** | |
@brief constructs a graph using move semantics | |
*/ | |
Graph(Graph&&); | |
/** | |
@brief destructs the graph object | |
*/ | |
~Graph(); | |
/** | |
@brief disabled copy assignment operator | |
*/ | |
Graph& operator = (const Graph&) = delete; | |
/** | |
@brief assigns a graph using move semantics | |
*/ | |
Graph& operator = (Graph&&); | |
/** | |
@brief queries if the graph is empty | |
*/ | |
bool empty() const; | |
/** | |
@brief queries the number of nodes in the graph | |
*/ | |
size_t size() const; | |
/** | |
@brief clears the graph | |
*/ | |
void clear(); | |
private: | |
std::vector<Node*> _nodes; | |
void _clear(); | |
void _clear_detached(); | |
void _merge(Graph&&); | |
void _erase(Node*); | |
/** | |
@private | |
*/ | |
template <typename ...ArgsT> | |
Node* _emplace_back(ArgsT&&...); | |
}; | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Runtime | |
@brief class to include a runtime object in a task | |
A runtime object allows users to interact with the | |
scheduling runtime inside a task, such as scheduling an active task, | |
spawning a subflow, and so on. | |
@code{.cpp} | |
tf::Task A, B, C, D; | |
std::tie(A, B, C, D) = taskflow.emplace( | |
[] () { return 0; }, | |
[&C] (tf::Runtime& rt) { // C must be captured by reference | |
std::cout << "B\n"; | |
rt.schedule(C); | |
}, | |
[] () { std::cout << "C\n"; }, | |
[] () { std::cout << "D\n"; } | |
); | |
A.precede(B, C, D); | |
executor.run(taskflow).wait(); | |
@endcode | |
A runtime object is associated with the worker and the executor | |
that runs the task. | |
*/ | |
class Runtime { | |
friend class Executor; | |
friend class FlowBuilder; | |
public: | |
/** | |
@brief obtains the running executor | |
The running executor of a runtime task is the executor that runs | |
the parent taskflow of that runtime task. | |
@code{.cpp} | |
tf::Executor executor; | |
tf::Taskflow taskflow; | |
taskflow.emplace([&](tf::Runtime& rt){ | |
assert(&(rt.executor()) == &executor); | |
}); | |
executor.run(taskflow).wait(); | |
@endcode | |
*/ | |
Executor& executor(); | |
/** | |
@brief schedules an active task immediately to the worker's queue | |
@param task the given active task to schedule immediately | |
This member function immediately schedules an active task to the | |
task queue of the associated worker in the runtime task. | |
An active task is a task in a running taskflow. | |
The task may or may not be running, and scheduling that task | |
will immediately put the task into the task queue of the worker | |
that is running the runtime task. | |
Consider the following example: | |
@code{.cpp} | |
tf::Task A, B, C, D; | |
std::tie(A, B, C, D) = taskflow.emplace( | |
[] () { return 0; }, | |
[&C] (tf::Runtime& rt) { // C must be captured by reference | |
std::cout << "B\n"; | |
rt.schedule(C); | |
}, | |
[] () { std::cout << "C\n"; }, | |
[] () { std::cout << "D\n"; } | |
); | |
A.precede(B, C, D); | |
executor.run(taskflow).wait(); | |
@endcode | |
The executor will first run the condition task @c A which returns @c 0 | |
to inform the scheduler to go to the runtime task @c B. | |
During the execution of @c B, it directly schedules task @c C without | |
going through the normal taskflow graph scheduling process. | |
At this moment, task @c C is active because its parent taskflow is running. | |
When the taskflow finishes, we will see both @c B and @c C in the output. | |
*/ | |
void schedule(Task task); | |
/** | |
@brief runs the given callable asynchronously | |
@tparam F callable type | |
@param f callable object | |
The method creates an asynchronous task to launch the given | |
function on the given arguments. | |
The difference to tf::Executor::async is that the created asynchronous task | |
pertains to the runtime. | |
When the runtime joins, all asynchronous tasks created from the runtime | |
are guaranteed to finish after the join returns. | |
For example: | |
@code{.cpp} | |
std::atomic<int> counter(0); | |
taskflow.emplace([&](tf::Runtime& rt){ | |
auto fu1 = rt.async([&](){ counter++; }); | |
auto fu2 = rt.async([&](){ counter++; }); | |
fu1.get(); | |
fu2.get(); | |
assert(counter == 2); | |
// spawn 100 asynchronous tasks from the worker of the runtime | |
for(int i=0; i<100; i++) { | |
rt.async([&](){ counter++; }); | |
} | |
// explicit join 100 asynchronous tasks | |
rt.join(); | |
assert(counter == 102); | |
}); | |
@endcode | |
This method is thread-safe and can be called by multiple workers | |
that hold the reference to the runtime. | |
For example, the code below spawns 100 tasks from the worker of | |
a runtime, and each of the 100 tasks spawns another task | |
that will be run by another worker. | |
@code{.cpp} | |
std::atomic<int> counter(0); | |
taskflow.emplace([&](tf::Runtime& rt){ | |
// worker of the runtime spawns 100 tasks each spawning another task | |
// that will be run by another worker | |
for(int i=0; i<100; i++) { | |
rt.async([&](){ | |
counter++; | |
rt.async([](){ counter++; }); | |
}); | |
} | |
// explicit join 100 asynchronous tasks | |
rt.join(); | |
assert(counter == 200); | |
}); | |
@endcode | |
*/ | |
template <typename F> | |
auto async(F&& f); | |
/** | |
@brief similar to tf::Runtime::async but assigns the task a name | |
@tparam F callable type | |
@param name assigned name to the task | |
@param f callable | |
@code{.cpp} | |
taskflow.emplace([&](tf::Runtime& rt){ | |
auto future = rt.async("my task", [](){}); | |
future.get(); | |
}); | |
@endcode | |
*/ | |
template <typename F> | |
auto async(const std::string& name, F&& f); | |
/** | |
@brief runs the given function asynchronously without returning any future object | |
@tparam F callable type | |
@param f callable | |
This member function is more efficient than tf::Runtime::async | |
and is encouraged to use when there is no data returned. | |
@code{.cpp} | |
std::atomic<int> counter(0); | |
taskflow.emplace([&](tf::Runtime& rt){ | |
for(int i=0; i<100; i++) { | |
rt.silent_async([&](){ counter++; }); | |
} | |
rt.join(); | |
assert(counter == 100); | |
}); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F> | |
void silent_async(F&& f); | |
/** | |
@brief similar to tf::Runtime::silent_async but assigns the task a name | |
@tparam F callable type | |
@param name assigned name to the task | |
@param f callable | |
@code{.cpp} | |
taskflow.emplace([&](tf::Runtime& rt){ | |
rt.silent_async("my task", [](){}); | |
rt.join(); | |
}); | |
@endcode | |
*/ | |
template <typename F> | |
void silent_async(const std::string& name, F&& f); | |
/** | |
@brief similar to tf::Runtime::silent_async but the caller must be the worker of the runtime | |
@tparam F callable type | |
@param name assigned name to the task | |
@param f callable | |
The method bypass the check of the caller worker from the executor | |
and thus can only called by the worker of this runtime. | |
@code{.cpp} | |
taskflow.emplace([&](tf::Runtime& rt){ | |
// running by the worker of this runtime | |
rt.silent_async_unchecked("my task", [](){}); | |
rt.join(); | |
}); | |
@endcode | |
*/ | |
template <typename F> | |
void silent_async_unchecked(const std::string& name, F&& f); | |
/** | |
@brief co-runs the given target and waits until it completes | |
A target can be one of the following forms: | |
+ a dynamic task to spawn a subflow or | |
+ a composable graph object with `tf::Graph& T::graph()` defined | |
@code{.cpp} | |
// co-run a subflow and wait until all tasks complete | |
taskflow.emplace([](tf::Runtime& rt){ | |
rt.corun([](tf::Subflow& sf){ | |
tf::Task A = sf.emplace([](){}); | |
tf::Task B = sf.emplace([](){}); | |
}); | |
}); | |
// co-run a taskflow and wait until all tasks complete | |
tf::Taskflow taskflow1, taskflow2; | |
taskflow1.emplace([](){ std::cout << "running taskflow1\n"; }); | |
taskflow2.emplace([&](tf::Runtime& rt){ | |
std::cout << "running taskflow2\n"; | |
rt.corun(taskflow1); | |
}); | |
executor.run(taskflow2).wait(); | |
@endcode | |
Although tf::Runtime::corun blocks until the operation completes, | |
the caller thread (worker) is not blocked (e.g., sleeping or holding any lock). | |
Instead, the caller thread joins the work-stealing loop of the executor | |
and returns when all tasks in the target completes. | |
*/ | |
template <typename T> | |
void corun(T&& target); | |
/** | |
@brief keeps running the work-stealing loop until the predicate becomes true | |
@tparam P predicate type | |
@param predicate a boolean predicate to indicate when to stop the loop | |
The method keeps the caller worker running in the work-stealing loop | |
until the stop predicate becomes true. | |
*/ | |
template <typename P> | |
void corun_until(P&& predicate); | |
/** | |
@brief joins all asynchronous tasks spawned by this runtime | |
Immediately joins all asynchronous tasks (tf::Runtime::async, | |
tf::Runtime::silent_async). | |
Unlike tf::Subflow::join, you can join multiples times from | |
a tf::Runtime object. | |
@code{.cpp} | |
std::atomic<size_t> counter{0}; | |
taskflow.emplace([&](tf::Runtime& rt){ | |
// spawn 100 async tasks and join | |
for(int i=0; i<100; i++) { | |
rt.silent_async([&](){ counter++; }); | |
} | |
rt.join(); | |
assert(counter == 100); | |
// spawn another 100 async tasks and join | |
for(int i=0; i<100; i++) { | |
rt.silent_async([&](){ counter++; }); | |
} | |
rt.join(); | |
assert(counter == 200); | |
}); | |
@endcode | |
@attention | |
Only the worker of this tf::Runtime can issue join. | |
*/ | |
inline void join(); | |
/** | |
@brief acquire a reference to the underlying worker | |
*/ | |
inline Worker& worker(); | |
protected: | |
/** | |
@private | |
*/ | |
explicit Runtime(Executor&, Worker&, Node*); | |
/** | |
@private | |
*/ | |
Executor& _executor; | |
/** | |
@private | |
*/ | |
Worker& _worker; | |
/** | |
@private | |
*/ | |
Node* _parent; | |
/** | |
@private | |
*/ | |
template <typename F> | |
auto _async(Worker& w, const std::string& name, F&& f); | |
/** | |
@private | |
*/ | |
template <typename F> | |
void _silent_async(Worker& w, const std::string& name, F&& f); | |
}; | |
// constructor | |
inline Runtime::Runtime(Executor& e, Worker& w, Node* p) : | |
_executor{e}, | |
_worker {w}, | |
_parent {p}{ | |
} | |
// Function: executor | |
inline Executor& Runtime::executor() { | |
return _executor; | |
} | |
// Function: worker | |
inline Worker& Runtime::worker() { | |
return _worker; | |
} | |
// ---------------------------------------------------------------------------- | |
// Node | |
// ---------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
class Node { | |
friend class Graph; | |
friend class Task; | |
friend class TaskView; | |
friend class Taskflow; | |
friend class Executor; | |
friend class FlowBuilder; | |
friend class Subflow; | |
friend class Runtime; | |
enum class AsyncState : int { | |
UNFINISHED = 0, | |
LOCKED = 1, | |
FINISHED = 2 | |
}; | |
TF_ENABLE_POOLABLE_ON_THIS; | |
// state bit flag | |
constexpr static int CONDITIONED = 1; | |
constexpr static int DETACHED = 2; | |
constexpr static int ACQUIRED = 4; | |
constexpr static int READY = 8; | |
using Placeholder = std::monostate; | |
// static work handle | |
struct Static { | |
template <typename C> | |
Static(C&&); | |
std::variant< | |
std::function<void()>, std::function<void(Runtime&)> | |
> work; | |
}; | |
// dynamic work handle | |
struct Dynamic { | |
template <typename C> | |
Dynamic(C&&); | |
std::function<void(Subflow&)> work; | |
Graph subgraph; | |
}; | |
// condition work handle | |
struct Condition { | |
template <typename C> | |
Condition(C&&); | |
std::variant< | |
std::function<int()>, std::function<int(Runtime&)> | |
> work; | |
}; | |
// multi-condition work handle | |
struct MultiCondition { | |
template <typename C> | |
MultiCondition(C&&); | |
std::variant< | |
std::function<SmallVector<int>()>, std::function<SmallVector<int>(Runtime&)> | |
> work; | |
}; | |
// module work handle | |
struct Module { | |
template <typename T> | |
Module(T&); | |
Graph& graph; | |
}; | |
// Async work | |
struct Async { | |
template <typename T> | |
Async(T&&); | |
std::function<void()> work; | |
}; | |
// silent dependent async | |
struct DependentAsync { | |
template <typename C> | |
DependentAsync(C&&); | |
std::function<void()> work; | |
std::atomic<AsyncState> state {AsyncState::UNFINISHED}; | |
}; | |
using handle_t = std::variant< | |
Placeholder, // placeholder | |
Static, // static tasking | |
Dynamic, // dynamic tasking | |
Condition, // conditional tasking | |
MultiCondition, // multi-conditional tasking | |
Module, // composable tasking | |
Async, // async tasking | |
DependentAsync // dependent async tasking (no future) | |
>; | |
struct Semaphores { | |
SmallVector<Semaphore*> to_acquire; | |
SmallVector<Semaphore*> to_release; | |
}; | |
public: | |
// variant index | |
constexpr static auto PLACEHOLDER = get_index_v<Placeholder, handle_t>; | |
constexpr static auto STATIC = get_index_v<Static, handle_t>; | |
constexpr static auto DYNAMIC = get_index_v<Dynamic, handle_t>; | |
constexpr static auto CONDITION = get_index_v<Condition, handle_t>; | |
constexpr static auto MULTI_CONDITION = get_index_v<MultiCondition, handle_t>; | |
constexpr static auto MODULE = get_index_v<Module, handle_t>; | |
constexpr static auto ASYNC = get_index_v<Async, handle_t>; | |
constexpr static auto DEPENDENT_ASYNC = get_index_v<DependentAsync, handle_t>; | |
Node() = default; | |
template <typename... Args> | |
Node(const std::string&, unsigned, Topology*, Node*, size_t, Args&&... args); | |
~Node(); | |
size_t num_successors() const; | |
size_t num_dependents() const; | |
size_t num_strong_dependents() const; | |
size_t num_weak_dependents() const; | |
const std::string& name() const; | |
private: | |
std::string _name; | |
unsigned _priority {0}; | |
Topology* _topology {nullptr}; | |
Node* _parent {nullptr}; | |
void* _data {nullptr}; | |
SmallVector<Node*> _successors; | |
SmallVector<Node*> _dependents; | |
std::atomic<int> _state {0}; | |
std::atomic<size_t> _join_counter {0}; | |
std::unique_ptr<Semaphores> _semaphores; | |
handle_t _handle; | |
void _precede(Node*); | |
void _set_up_join_counter(); | |
bool _is_cancelled() const; | |
bool _is_conditioner() const; | |
bool _acquire_all(SmallVector<Node*>&); | |
SmallVector<Node*> _release_all(); | |
}; | |
// ---------------------------------------------------------------------------- | |
// Node Object Pool | |
// ---------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
inline ObjectPool<Node> node_pool; | |
// ---------------------------------------------------------------------------- | |
// Definition for Node::Static | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename C> | |
Node::Static::Static(C&& c) : work {std::forward<C>(c)} { | |
} | |
// ---------------------------------------------------------------------------- | |
// Definition for Node::Dynamic | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename C> | |
Node::Dynamic::Dynamic(C&& c) : work {std::forward<C>(c)} { | |
} | |
// ---------------------------------------------------------------------------- | |
// Definition for Node::Condition | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename C> | |
Node::Condition::Condition(C&& c) : work {std::forward<C>(c)} { | |
} | |
// ---------------------------------------------------------------------------- | |
// Definition for Node::MultiCondition | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename C> | |
Node::MultiCondition::MultiCondition(C&& c) : work {std::forward<C>(c)} { | |
} | |
// ---------------------------------------------------------------------------- | |
// Definition for Node::Module | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename T> | |
inline Node::Module::Module(T& obj) : graph{ obj.graph() } { | |
} | |
// ---------------------------------------------------------------------------- | |
// Definition for Node::Async | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename C> | |
Node::Async::Async(C&& c) : work {std::forward<C>(c)} { | |
} | |
// ---------------------------------------------------------------------------- | |
// Definition for Node::DependentAsync | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename C> | |
Node::DependentAsync::DependentAsync(C&& c) : work {std::forward<C>(c)} { | |
} | |
// ---------------------------------------------------------------------------- | |
// Definition for Node | |
// ---------------------------------------------------------------------------- | |
// Constructor | |
template <typename... Args> | |
Node::Node( | |
const std::string& name, | |
unsigned priority, | |
Topology* topology, | |
Node* parent, | |
size_t join_counter, | |
Args&&... args | |
) : | |
_name {name}, | |
_priority {priority}, | |
_topology {topology}, | |
_parent {parent}, | |
_join_counter {join_counter}, | |
_handle {std::forward<Args>(args)...} { | |
} | |
//Node::Node(Args&&... args): _handle{std::forward<Args>(args)...} { | |
//} | |
// Destructor | |
inline Node::~Node() { | |
// this is to avoid stack overflow | |
if(_handle.index() == DYNAMIC) { | |
// using std::get_if instead of std::get makes this compatible | |
// with older macOS versions | |
// the result of std::get_if is guaranteed to be non-null | |
// due to the index check above | |
auto& subgraph = std::get_if<Dynamic>(&_handle)->subgraph; | |
std::vector<Node*> nodes; | |
nodes.reserve(subgraph.size()); | |
std::move( | |
subgraph._nodes.begin(), subgraph._nodes.end(), std::back_inserter(nodes) | |
); | |
subgraph._nodes.clear(); | |
size_t i = 0; | |
while(i < nodes.size()) { | |
if(nodes[i]->_handle.index() == DYNAMIC) { | |
auto& sbg = std::get_if<Dynamic>(&(nodes[i]->_handle))->subgraph; | |
std::move( | |
sbg._nodes.begin(), sbg._nodes.end(), std::back_inserter(nodes) | |
); | |
sbg._nodes.clear(); | |
} | |
++i; | |
} | |
//auto& np = Graph::_node_pool(); | |
for(i=0; i<nodes.size(); ++i) { | |
node_pool.recycle(nodes[i]); | |
} | |
} | |
} | |
// Procedure: _precede | |
inline void Node::_precede(Node* v) { | |
_successors.push_back(v); | |
v->_dependents.push_back(this); | |
} | |
// Function: num_successors | |
inline size_t Node::num_successors() const { | |
return _successors.size(); | |
} | |
// Function: dependents | |
inline size_t Node::num_dependents() const { | |
return _dependents.size(); | |
} | |
// Function: num_weak_dependents | |
inline size_t Node::num_weak_dependents() const { | |
size_t n = 0; | |
for(size_t i=0; i<_dependents.size(); i++) { | |
//if(_dependents[i]->_handle.index() == Node::CONDITION) { | |
if(_dependents[i]->_is_conditioner()) { | |
n++; | |
} | |
} | |
return n; | |
} | |
// Function: num_strong_dependents | |
inline size_t Node::num_strong_dependents() const { | |
size_t n = 0; | |
for(size_t i=0; i<_dependents.size(); i++) { | |
//if(_dependents[i]->_handle.index() != Node::CONDITION) { | |
if(!_dependents[i]->_is_conditioner()) { | |
n++; | |
} | |
} | |
return n; | |
} | |
// Function: name | |
inline const std::string& Node::name() const { | |
return _name; | |
} | |
// Function: _is_conditioner | |
inline bool Node::_is_conditioner() const { | |
return _handle.index() == Node::CONDITION || | |
_handle.index() == Node::MULTI_CONDITION; | |
} | |
// Function: _is_cancelled | |
inline bool Node::_is_cancelled() const { | |
return _topology && _topology->_is_cancelled.load(std::memory_order_relaxed); | |
} | |
// Procedure: _set_up_join_counter | |
inline void Node::_set_up_join_counter() { | |
size_t c = 0; | |
for(auto p : _dependents) { | |
//if(p->_handle.index() == Node::CONDITION) { | |
if(p->_is_conditioner()) { | |
_state.fetch_or(Node::CONDITIONED, std::memory_order_relaxed); | |
} | |
else { | |
c++; | |
} | |
} | |
_join_counter.store(c, std::memory_order_release); | |
} | |
// Function: _acquire_all | |
inline bool Node::_acquire_all(SmallVector<Node*>& nodes) { | |
auto& to_acquire = _semaphores->to_acquire; | |
for(size_t i = 0; i < to_acquire.size(); ++i) { | |
if(!to_acquire[i]->_try_acquire_or_wait(this)) { | |
for(size_t j = 1; j <= i; ++j) { | |
auto r = to_acquire[i-j]->_release(); | |
nodes.insert(std::end(nodes), std::begin(r), std::end(r)); | |
} | |
return false; | |
} | |
} | |
return true; | |
} | |
// Function: _release_all | |
inline SmallVector<Node*> Node::_release_all() { | |
auto& to_release = _semaphores->to_release; | |
SmallVector<Node*> nodes; | |
for(const auto& sem : to_release) { | |
auto r = sem->_release(); | |
nodes.insert(std::end(nodes), std::begin(r), std::end(r)); | |
} | |
return nodes; | |
} | |
// ---------------------------------------------------------------------------- | |
// Node Deleter | |
// ---------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
struct NodeDeleter { | |
void operator ()(Node* ptr) { | |
node_pool.recycle(ptr); | |
} | |
}; | |
// ---------------------------------------------------------------------------- | |
// Graph definition | |
// ---------------------------------------------------------------------------- | |
// Destructor | |
inline Graph::~Graph() { | |
_clear(); | |
} | |
// Move constructor | |
inline Graph::Graph(Graph&& other) : | |
_nodes {std::move(other._nodes)} { | |
} | |
// Move assignment | |
inline Graph& Graph::operator = (Graph&& other) { | |
_clear(); | |
_nodes = std::move(other._nodes); | |
return *this; | |
} | |
// Procedure: clear | |
inline void Graph::clear() { | |
_clear(); | |
} | |
// Procedure: clear | |
inline void Graph::_clear() { | |
for(auto node : _nodes) { | |
node_pool.recycle(node); | |
} | |
_nodes.clear(); | |
} | |
// Procedure: clear_detached | |
inline void Graph::_clear_detached() { | |
auto mid = std::partition(_nodes.begin(), _nodes.end(), [] (Node* node) { | |
return !(node->_state.load(std::memory_order_relaxed) & Node::DETACHED); | |
}); | |
for(auto itr = mid; itr != _nodes.end(); ++itr) { | |
node_pool.recycle(*itr); | |
} | |
_nodes.resize(std::distance(_nodes.begin(), mid)); | |
} | |
// Procedure: merge | |
inline void Graph::_merge(Graph&& g) { | |
for(auto n : g._nodes) { | |
_nodes.push_back(n); | |
} | |
g._nodes.clear(); | |
} | |
// Function: erase | |
inline void Graph::_erase(Node* node) { | |
if(auto I = std::find(_nodes.begin(), _nodes.end(), node); I != _nodes.end()) { | |
_nodes.erase(I); | |
node_pool.recycle(node); | |
} | |
} | |
// Function: size | |
inline size_t Graph::size() const { | |
return _nodes.size(); | |
} | |
// Function: empty | |
inline bool Graph::empty() const { | |
return _nodes.empty(); | |
} | |
/** | |
@private | |
*/ | |
template <typename ...ArgsT> | |
Node* Graph::_emplace_back(ArgsT&&... args) { | |
_nodes.push_back(node_pool.animate(std::forward<ArgsT>(args)...)); | |
return _nodes.back(); | |
} | |
} // end of namespace tf. --------------------------------------------------- | |
/** | |
@file task.hpp | |
@brief task include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Task Types | |
// ---------------------------------------------------------------------------- | |
/** | |
@enum TaskType | |
@brief enumeration of all task types | |
*/ | |
enum class TaskType : int { | |
/** @brief placeholder task type */ | |
PLACEHOLDER = 0, | |
/** @brief static task type */ | |
STATIC, | |
/** @brief dynamic (subflow) task type */ | |
DYNAMIC, | |
/** @brief condition task type */ | |
CONDITION, | |
/** @brief module task type */ | |
MODULE, | |
/** @brief asynchronous task type */ | |
ASYNC, | |
/** @brief undefined task type (for internal use only) */ | |
UNDEFINED | |
}; | |
/** | |
@private | |
@brief array of all task types (used for iterating task types) | |
*/ | |
inline constexpr std::array<TaskType, 6> TASK_TYPES = { | |
TaskType::PLACEHOLDER, | |
TaskType::STATIC, | |
TaskType::DYNAMIC, | |
TaskType::CONDITION, | |
TaskType::MODULE, | |
TaskType::ASYNC, | |
}; | |
/** | |
@brief convert a task type to a human-readable string | |
The name of each task type is the litte-case string of its characters. | |
@code{.cpp} | |
TaskType::PLACEHOLDER -> "placeholder" | |
TaskType::STATIC -> "static" | |
TaskType::DYNAMIC -> "subflow" | |
TaskType::CONDITION -> "condition" | |
TaskType::MODULE -> "module" | |
TaskType::ASYNC -> "async" | |
@endcode | |
*/ | |
inline const char* to_string(TaskType type) { | |
const char* val; | |
switch(type) { | |
case TaskType::PLACEHOLDER: val = "placeholder"; break; | |
case TaskType::STATIC: val = "static"; break; | |
case TaskType::DYNAMIC: val = "subflow"; break; | |
case TaskType::CONDITION: val = "condition"; break; | |
case TaskType::MODULE: val = "module"; break; | |
case TaskType::ASYNC: val = "async"; break; | |
default: val = "undefined"; break; | |
} | |
return val; | |
} | |
// ---------------------------------------------------------------------------- | |
// Task Traits | |
// ---------------------------------------------------------------------------- | |
/** | |
@brief determines if a callable is a dynamic task | |
A dynamic task is a callable object constructible from std::function<void(Subflow&)>. | |
*/ | |
template <typename C> | |
constexpr bool is_dynamic_task_v = | |
std::is_invocable_r_v<void, C, Subflow&> && | |
!std::is_invocable_r_v<void, C, Runtime&>; | |
/** | |
@brief determines if a callable is a condition task | |
A condition task is a callable object constructible from std::function<int()> | |
or std::function<int(tf::Runtime&)>. | |
*/ | |
template <typename C> | |
constexpr bool is_condition_task_v = | |
(std::is_invocable_r_v<int, C> || std::is_invocable_r_v<int, C, Runtime&>) && | |
!is_dynamic_task_v<C>; | |
/** | |
@brief determines if a callable is a multi-condition task | |
A multi-condition task is a callable object constructible from | |
std::function<tf::SmallVector<int>()> or | |
std::function<tf::SmallVector<int>(tf::Runtime&)>. | |
*/ | |
template <typename C> | |
constexpr bool is_multi_condition_task_v = | |
(std::is_invocable_r_v<SmallVector<int>, C> || | |
std::is_invocable_r_v<SmallVector<int>, C, Runtime&>) && | |
!is_dynamic_task_v<C>; | |
/** | |
@brief determines if a callable is a static task | |
A static task is a callable object constructible from std::function<void()> | |
or std::function<void(tf::Runtime&)>. | |
*/ | |
template <typename C> | |
constexpr bool is_static_task_v = | |
(std::is_invocable_r_v<void, C> || std::is_invocable_r_v<void, C, Runtime&>) && | |
!is_condition_task_v<C> && | |
!is_multi_condition_task_v<C> && | |
!is_dynamic_task_v<C>; | |
// ---------------------------------------------------------------------------- | |
// Task | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Task | |
@brief class to create a task handle over a node in a taskflow graph | |
A task is a wrapper over a node in a taskflow graph. | |
It provides a set of methods for users to access and modify the attributes of | |
the associated node in the taskflow graph. | |
A task is very lightweight object (i.e., only storing a node pointer) that | |
can be trivially copied around, | |
and it does not own the lifetime of the associated node. | |
*/ | |
class Task { | |
friend class FlowBuilder; | |
friend class Runtime; | |
friend class Taskflow; | |
friend class TaskView; | |
friend class Executor; | |
public: | |
/** | |
@brief constructs an empty task | |
*/ | |
Task() = default; | |
/** | |
@brief constructs the task with the copy of the other task | |
*/ | |
Task(const Task& other); | |
/** | |
@brief replaces the contents with a copy of the other task | |
*/ | |
Task& operator = (const Task&); | |
/** | |
@brief replaces the contents with a null pointer | |
*/ | |
Task& operator = (std::nullptr_t); | |
/** | |
@brief compares if two tasks are associated with the same graph node | |
*/ | |
bool operator == (const Task& rhs) const; | |
/** | |
@brief compares if two tasks are not associated with the same graph node | |
*/ | |
bool operator != (const Task& rhs) const; | |
/** | |
@brief queries the name of the task | |
*/ | |
const std::string& name() const; | |
/** | |
@brief queries the number of successors of the task | |
*/ | |
size_t num_successors() const; | |
/** | |
@brief queries the number of predecessors of the task | |
*/ | |
size_t num_dependents() const; | |
/** | |
@brief queries the number of strong dependents of the task | |
*/ | |
size_t num_strong_dependents() const; | |
/** | |
@brief queries the number of weak dependents of the task | |
*/ | |
size_t num_weak_dependents() const; | |
/** | |
@brief assigns a name to the task | |
@param name a @std_string acceptable string | |
@return @c *this | |
*/ | |
Task& name(const std::string& name); | |
/** | |
@brief assigns a callable | |
@tparam C callable type | |
@param callable callable to construct a task | |
@return @c *this | |
*/ | |
template <typename C> | |
Task& work(C&& callable); | |
/** | |
@brief creates a module task from a taskflow | |
@tparam T object type | |
@param object a custom object that defines @c T::graph() method | |
@return @c *this | |
*/ | |
template <typename T> | |
Task& composed_of(T& object); | |
/** | |
@brief adds precedence links from this to other tasks | |
@tparam Ts parameter pack | |
@param tasks one or multiple tasks | |
@return @c *this | |
*/ | |
template <typename... Ts> | |
Task& precede(Ts&&... tasks); | |
/** | |
@brief adds precedence links from other tasks to this | |
@tparam Ts parameter pack | |
@param tasks one or multiple tasks | |
@return @c *this | |
*/ | |
template <typename... Ts> | |
Task& succeed(Ts&&... tasks); | |
/** | |
@brief makes the task release this semaphore | |
*/ | |
Task& release(Semaphore& semaphore); | |
/** | |
@brief makes the task acquire this semaphore | |
*/ | |
Task& acquire(Semaphore& semaphore); | |
/** | |
@brief assigns pointer to user data | |
@param data pointer to user data | |
The following example shows how to attach user data to a task and | |
run the task iteratively while changing the data value: | |
@code{.cpp} | |
tf::Executor executor; | |
tf::Taskflow taskflow("attach data to a task"); | |
int data; | |
// create a task and attach it the data | |
auto A = taskflow.placeholder(); | |
A.data(&data).work([A](){ | |
auto d = *static_cast<int*>(A.data()); | |
std::cout << "data is " << d << std::endl; | |
}); | |
// run the taskflow iteratively with changing data | |
for(data = 0; data<10; data++){ | |
executor.run(taskflow).wait(); | |
} | |
@endcode | |
@return @c *this | |
*/ | |
Task& data(void* data); | |
/** | |
@brief assigns a priority value to the task | |
A priority value can be one of the following three levels, | |
tf::TaskPriority::HIGH (numerically equivalent to 0), | |
tf::TaskPriority::NORMAL (numerically equivalent to 1), and | |
tf::TaskPriority::LOW (numerically equivalent to 2). | |
The smaller the priority value, the higher the priority. | |
*/ | |
Task& priority(TaskPriority p); | |
/** | |
@brief queries the priority value of the task | |
*/ | |
TaskPriority priority() const; | |
/** | |
@brief resets the task handle to null | |
*/ | |
void reset(); | |
/** | |
@brief resets the associated work to a placeholder | |
*/ | |
void reset_work(); | |
/** | |
@brief queries if the task handle points to a task node | |
*/ | |
bool empty() const; | |
/** | |
@brief queries if the task has a work assigned | |
*/ | |
bool has_work() const; | |
/** | |
@brief applies an visitor callable to each successor of the task | |
*/ | |
template <typename V> | |
void for_each_successor(V&& visitor) const; | |
/** | |
@brief applies an visitor callable to each dependents of the task | |
*/ | |
template <typename V> | |
void for_each_dependent(V&& visitor) const; | |
/** | |
@brief obtains a hash value of the underlying node | |
*/ | |
size_t hash_value() const; | |
/** | |
@brief returns the task type | |
*/ | |
TaskType type() const; | |
/** | |
@brief dumps the task through an output stream | |
*/ | |
void dump(std::ostream& ostream) const; | |
/** | |
@brief queries pointer to user data | |
*/ | |
void* data() const; | |
private: | |
Task(Node*); | |
Node* _node {nullptr}; | |
}; | |
// Constructor | |
inline Task::Task(Node* node) : _node {node} { | |
} | |
// Constructor | |
inline Task::Task(const Task& rhs) : _node {rhs._node} { | |
} | |
// Function: precede | |
template <typename... Ts> | |
Task& Task::precede(Ts&&... tasks) { | |
(_node->_precede(tasks._node), ...); | |
//_precede(std::forward<Ts>(tasks)...); | |
return *this; | |
} | |
// Function: succeed | |
template <typename... Ts> | |
Task& Task::succeed(Ts&&... tasks) { | |
(tasks._node->_precede(_node), ...); | |
//_succeed(std::forward<Ts>(tasks)...); | |
return *this; | |
} | |
// Function: composed_of | |
template <typename T> | |
Task& Task::composed_of(T& object) { | |
_node->_handle.emplace<Node::Module>(object); | |
return *this; | |
} | |
// Operator = | |
inline Task& Task::operator = (const Task& rhs) { | |
_node = rhs._node; | |
return *this; | |
} | |
// Operator = | |
inline Task& Task::operator = (std::nullptr_t ptr) { | |
_node = ptr; | |
return *this; | |
} | |
// Operator == | |
inline bool Task::operator == (const Task& rhs) const { | |
return _node == rhs._node; | |
} | |
// Operator != | |
inline bool Task::operator != (const Task& rhs) const { | |
return _node != rhs._node; | |
} | |
// Function: name | |
inline Task& Task::name(const std::string& name) { | |
_node->_name = name; | |
return *this; | |
} | |
// Function: acquire | |
inline Task& Task::acquire(Semaphore& s) { | |
if(!_node->_semaphores) { | |
_node->_semaphores = std::make_unique<Node::Semaphores>(); | |
} | |
_node->_semaphores->to_acquire.push_back(&s); | |
return *this; | |
} | |
// Function: release | |
inline Task& Task::release(Semaphore& s) { | |
if(!_node->_semaphores) { | |
//_node->_semaphores.emplace(); | |
_node->_semaphores = std::make_unique<Node::Semaphores>(); | |
} | |
_node->_semaphores->to_release.push_back(&s); | |
return *this; | |
} | |
// Procedure: reset | |
inline void Task::reset() { | |
_node = nullptr; | |
} | |
// Procedure: reset_work | |
inline void Task::reset_work() { | |
_node->_handle.emplace<std::monostate>(); | |
} | |
// Function: name | |
inline const std::string& Task::name() const { | |
return _node->_name; | |
} | |
// Function: num_dependents | |
inline size_t Task::num_dependents() const { | |
return _node->num_dependents(); | |
} | |
// Function: num_strong_dependents | |
inline size_t Task::num_strong_dependents() const { | |
return _node->num_strong_dependents(); | |
} | |
// Function: num_weak_dependents | |
inline size_t Task::num_weak_dependents() const { | |
return _node->num_weak_dependents(); | |
} | |
// Function: num_successors | |
inline size_t Task::num_successors() const { | |
return _node->num_successors(); | |
} | |
// Function: empty | |
inline bool Task::empty() const { | |
return _node == nullptr; | |
} | |
// Function: has_work | |
inline bool Task::has_work() const { | |
return _node ? _node->_handle.index() != 0 : false; | |
} | |
// Function: task_type | |
inline TaskType Task::type() const { | |
switch(_node->_handle.index()) { | |
case Node::PLACEHOLDER: return TaskType::PLACEHOLDER; | |
case Node::STATIC: return TaskType::STATIC; | |
case Node::DYNAMIC: return TaskType::DYNAMIC; | |
case Node::CONDITION: return TaskType::CONDITION; | |
case Node::MULTI_CONDITION: return TaskType::CONDITION; | |
case Node::MODULE: return TaskType::MODULE; | |
case Node::ASYNC: return TaskType::ASYNC; | |
case Node::DEPENDENT_ASYNC: return TaskType::ASYNC; | |
default: return TaskType::UNDEFINED; | |
} | |
} | |
// Function: for_each_successor | |
template <typename V> | |
void Task::for_each_successor(V&& visitor) const { | |
for(size_t i=0; i<_node->_successors.size(); ++i) { | |
visitor(Task(_node->_successors[i])); | |
} | |
} | |
// Function: for_each_dependent | |
template <typename V> | |
void Task::for_each_dependent(V&& visitor) const { | |
for(size_t i=0; i<_node->_dependents.size(); ++i) { | |
visitor(Task(_node->_dependents[i])); | |
} | |
} | |
// Function: hash_value | |
inline size_t Task::hash_value() const { | |
return std::hash<Node*>{}(_node); | |
} | |
// Procedure: dump | |
inline void Task::dump(std::ostream& os) const { | |
os << "task "; | |
if(name().empty()) os << _node; | |
else os << name(); | |
os << " [type=" << to_string(type()) << ']'; | |
} | |
// Function: work | |
template <typename C> | |
Task& Task::work(C&& c) { | |
if constexpr(is_static_task_v<C>) { | |
_node->_handle.emplace<Node::Static>(std::forward<C>(c)); | |
} | |
else if constexpr(is_dynamic_task_v<C>) { | |
_node->_handle.emplace<Node::Dynamic>(std::forward<C>(c)); | |
} | |
else if constexpr(is_condition_task_v<C>) { | |
_node->_handle.emplace<Node::Condition>(std::forward<C>(c)); | |
} | |
else if constexpr(is_multi_condition_task_v<C>) { | |
_node->_handle.emplace<Node::MultiCondition>(std::forward<C>(c)); | |
} | |
else { | |
static_assert(dependent_false_v<C>, "invalid task callable"); | |
} | |
return *this; | |
} | |
// Function: data | |
inline void* Task::data() const { | |
return _node->_data; | |
} | |
// Function: data | |
inline Task& Task::data(void* data) { | |
_node->_data = data; | |
return *this; | |
} | |
// Function: priority | |
inline Task& Task::priority(TaskPriority p) { | |
_node->_priority = static_cast<unsigned>(p); | |
return *this; | |
} | |
// Function: priority | |
inline TaskPriority Task::priority() const { | |
return static_cast<TaskPriority>(_node->_priority); | |
} | |
// ---------------------------------------------------------------------------- | |
// global ostream | |
// ---------------------------------------------------------------------------- | |
/** | |
@brief overload of ostream inserter operator for Task | |
*/ | |
inline std::ostream& operator << (std::ostream& os, const Task& task) { | |
task.dump(os); | |
return os; | |
} | |
// ---------------------------------------------------------------------------- | |
// Task View | |
// ---------------------------------------------------------------------------- | |
/** | |
@class TaskView | |
@brief class to access task information from the observer interface | |
*/ | |
class TaskView { | |
friend class Executor; | |
public: | |
/** | |
@brief queries the name of the task | |
*/ | |
const std::string& name() const; | |
/** | |
@brief queries the number of successors of the task | |
*/ | |
size_t num_successors() const; | |
/** | |
@brief queries the number of predecessors of the task | |
*/ | |
size_t num_dependents() const; | |
/** | |
@brief queries the number of strong dependents of the task | |
*/ | |
size_t num_strong_dependents() const; | |
/** | |
@brief queries the number of weak dependents of the task | |
*/ | |
size_t num_weak_dependents() const; | |
/** | |
@brief applies an visitor callable to each successor of the task | |
*/ | |
template <typename V> | |
void for_each_successor(V&& visitor) const; | |
/** | |
@brief applies an visitor callable to each dependents of the task | |
*/ | |
template <typename V> | |
void for_each_dependent(V&& visitor) const; | |
/** | |
@brief queries the task type | |
*/ | |
TaskType type() const; | |
/** | |
@brief obtains a hash value of the underlying node | |
*/ | |
size_t hash_value() const; | |
private: | |
TaskView(const Node&); | |
TaskView(const TaskView&) = default; | |
const Node& _node; | |
}; | |
// Constructor | |
inline TaskView::TaskView(const Node& node) : _node {node} { | |
} | |
// Function: name | |
inline const std::string& TaskView::name() const { | |
return _node._name; | |
} | |
// Function: num_dependents | |
inline size_t TaskView::num_dependents() const { | |
return _node.num_dependents(); | |
} | |
// Function: num_strong_dependents | |
inline size_t TaskView::num_strong_dependents() const { | |
return _node.num_strong_dependents(); | |
} | |
// Function: num_weak_dependents | |
inline size_t TaskView::num_weak_dependents() const { | |
return _node.num_weak_dependents(); | |
} | |
// Function: num_successors | |
inline size_t TaskView::num_successors() const { | |
return _node.num_successors(); | |
} | |
// Function: type | |
inline TaskType TaskView::type() const { | |
switch(_node._handle.index()) { | |
case Node::PLACEHOLDER: return TaskType::PLACEHOLDER; | |
case Node::STATIC: return TaskType::STATIC; | |
case Node::DYNAMIC: return TaskType::DYNAMIC; | |
case Node::CONDITION: return TaskType::CONDITION; | |
case Node::MULTI_CONDITION: return TaskType::CONDITION; | |
case Node::MODULE: return TaskType::MODULE; | |
case Node::ASYNC: return TaskType::ASYNC; | |
case Node::DEPENDENT_ASYNC: return TaskType::ASYNC; | |
default: return TaskType::UNDEFINED; | |
} | |
} | |
// Function: hash_value | |
inline size_t TaskView::hash_value() const { | |
return std::hash<const Node*>{}(&_node); | |
} | |
// Function: for_each_successor | |
template <typename V> | |
void TaskView::for_each_successor(V&& visitor) const { | |
for(size_t i=0; i<_node._successors.size(); ++i) { | |
visitor(TaskView(*_node._successors[i])); | |
} | |
} | |
// Function: for_each_dependent | |
template <typename V> | |
void TaskView::for_each_dependent(V&& visitor) const { | |
for(size_t i=0; i<_node._dependents.size(); ++i) { | |
visitor(TaskView(*_node._dependents[i])); | |
} | |
} | |
} // end of namespace tf. --------------------------------------------------- | |
namespace std { | |
/** | |
@struct hash | |
@brief hash specialization for std::hash<tf::Task> | |
*/ | |
template <> | |
struct hash<tf::Task> { | |
auto operator() (const tf::Task& task) const noexcept { | |
return task.hash_value(); | |
} | |
}; | |
/** | |
@struct hash | |
@brief hash specialization for std::hash<tf::TaskView> | |
*/ | |
template <> | |
struct hash<tf::TaskView> { | |
auto operator() (const tf::TaskView& task_view) const noexcept { | |
return task_view.hash_value(); | |
} | |
}; | |
} // end of namespace std ---------------------------------------------------- | |
// 2019/02/09 - created by Tsung-Wei Huang | |
// - modified the event count from Eigen | |
#include <iostream> | |
#include <vector> | |
#include <cstdlib> | |
#include <cstdio> | |
#include <atomic> | |
#include <memory> | |
#include <deque> | |
#include <mutex> | |
#include <condition_variable> | |
#include <thread> | |
#include <algorithm> | |
#include <numeric> | |
#include <cassert> | |
// This file is part of Eigen, a lightweight C++ template library | |
// for linear algebra. | |
// | |
// Copyright (C) 2016 Dmitry Vyukov <[email protected]> | |
// | |
// This Source Code Form is subject to the terms of the Mozilla | |
// Public License v. 2.0. If a copy of the MPL was not distributed | |
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
namespace tf { | |
// Notifier allows to wait for arbitrary predicates in non-blocking | |
// algorithms. Think of condition variable, but wait predicate does not need to | |
// be protected by a mutex. Usage: | |
// Waiting thread does: | |
// | |
// if (predicate) | |
// return act(); | |
// Notifier::Waiter& w = waiters[my_index]; | |
// ec.prepare_wait(&w); | |
// if (predicate) { | |
// ec.cancel_wait(&w); | |
// return act(); | |
// } | |
// ec.commit_wait(&w); | |
// | |
// Notifying thread does: | |
// | |
// predicate = true; | |
// ec.notify(true); | |
// | |
// notify is cheap if there are no waiting threads. prepare_wait/commit_wait are not | |
// cheap, but they are executed only if the preceeding predicate check has | |
// failed. | |
// | |
// Algorihtm outline: | |
// There are two main variables: predicate (managed by user) and _state. | |
// Operation closely resembles Dekker mutual algorithm: | |
// https://en.wikipedia.org/wiki/Dekker%27s_algorithm | |
// Waiting thread sets _state then checks predicate, Notifying thread sets | |
// predicate then checks _state. Due to seq_cst fences in between these | |
// operations it is guaranteed than either waiter will see predicate change | |
// and won't block, or notifying thread will see _state change and will unblock | |
// the waiter, or both. But it can't happen that both threads don't see each | |
// other changes, which would lead to deadlock. | |
class Notifier { | |
friend class Executor; | |
public: | |
struct Waiter { | |
std::atomic<Waiter*> next; | |
std::mutex mu; | |
std::condition_variable cv; | |
uint64_t epoch; | |
unsigned state; | |
enum { | |
kNotSignaled, | |
kWaiting, | |
kSignaled, | |
}; | |
}; | |
explicit Notifier(size_t N) : _waiters{N} { | |
assert(_waiters.size() < (1 << kWaiterBits) - 1); | |
// Initialize epoch to something close to overflow to test overflow. | |
_state = kStackMask | (kEpochMask - kEpochInc * _waiters.size() * 2); | |
} | |
~Notifier() { | |
// Ensure there are no waiters. | |
assert((_state.load() & (kStackMask | kWaiterMask)) == kStackMask); | |
} | |
// prepare_wait prepares for waiting. | |
// After calling this function the thread must re-check the wait predicate | |
// and call either cancel_wait or commit_wait passing the same Waiter object. | |
void prepare_wait(Waiter* w) { | |
w->epoch = _state.fetch_add(kWaiterInc, std::memory_order_relaxed); | |
std::atomic_thread_fence(std::memory_order_seq_cst); | |
} | |
// commit_wait commits waiting. | |
void commit_wait(Waiter* w) { | |
w->state = Waiter::kNotSignaled; | |
// Modification epoch of this waiter. | |
uint64_t epoch = | |
(w->epoch & kEpochMask) + | |
(((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift); | |
uint64_t state = _state.load(std::memory_order_seq_cst); | |
for (;;) { | |
if (int64_t((state & kEpochMask) - epoch) < 0) { | |
// The preceeding waiter has not decided on its fate. Wait until it | |
// calls either cancel_wait or commit_wait, or is notified. | |
std::this_thread::yield(); | |
state = _state.load(std::memory_order_seq_cst); | |
continue; | |
} | |
// We've already been notified. | |
if (int64_t((state & kEpochMask) - epoch) > 0) return; | |
// Remove this thread from prewait counter and add it to the waiter list. | |
assert((state & kWaiterMask) != 0); | |
uint64_t newstate = state - kWaiterInc + kEpochInc; | |
//newstate = (newstate & ~kStackMask) | (w - &_waiters[0]); | |
newstate = static_cast<uint64_t>((newstate & ~kStackMask) | static_cast<uint64_t>(w - &_waiters[0])); | |
if ((state & kStackMask) == kStackMask) | |
w->next.store(nullptr, std::memory_order_relaxed); | |
else | |
w->next.store(&_waiters[state & kStackMask], std::memory_order_relaxed); | |
if (_state.compare_exchange_weak(state, newstate, | |
std::memory_order_release)) | |
break; | |
} | |
_park(w); | |
} | |
// cancel_wait cancels effects of the previous prepare_wait call. | |
void cancel_wait(Waiter* w) { | |
uint64_t epoch = | |
(w->epoch & kEpochMask) + | |
(((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift); | |
uint64_t state = _state.load(std::memory_order_relaxed); | |
for (;;) { | |
if (int64_t((state & kEpochMask) - epoch) < 0) { | |
// The preceeding waiter has not decided on its fate. Wait until it | |
// calls either cancel_wait or commit_wait, or is notified. | |
std::this_thread::yield(); | |
state = _state.load(std::memory_order_relaxed); | |
continue; | |
} | |
// We've already been notified. | |
if (int64_t((state & kEpochMask) - epoch) > 0) return; | |
// Remove this thread from prewait counter. | |
assert((state & kWaiterMask) != 0); | |
if (_state.compare_exchange_weak(state, state - kWaiterInc + kEpochInc, | |
std::memory_order_relaxed)) | |
return; | |
} | |
} | |
// notify wakes one or all waiting threads. | |
// Must be called after changing the associated wait predicate. | |
void notify(bool all) { | |
std::atomic_thread_fence(std::memory_order_seq_cst); | |
uint64_t state = _state.load(std::memory_order_acquire); | |
for (;;) { | |
// Easy case: no waiters. | |
if ((state & kStackMask) == kStackMask && (state & kWaiterMask) == 0) | |
return; | |
uint64_t waiters = (state & kWaiterMask) >> kWaiterShift; | |
uint64_t newstate; | |
if (all) { | |
// Reset prewait counter and empty wait list. | |
newstate = (state & kEpochMask) + (kEpochInc * waiters) + kStackMask; | |
} else if (waiters) { | |
// There is a thread in pre-wait state, unblock it. | |
newstate = state + kEpochInc - kWaiterInc; | |
} else { | |
// Pop a waiter from list and unpark it. | |
Waiter* w = &_waiters[state & kStackMask]; | |
Waiter* wnext = w->next.load(std::memory_order_relaxed); | |
uint64_t next = kStackMask; | |
//if (wnext != nullptr) next = wnext - &_waiters[0]; | |
if (wnext != nullptr) next = static_cast<uint64_t>(wnext - &_waiters[0]); | |
// Note: we don't add kEpochInc here. ABA problem on the lock-free stack | |
// can't happen because a waiter is re-pushed onto the stack only after | |
// it was in the pre-wait state which inevitably leads to epoch | |
// increment. | |
newstate = (state & kEpochMask) + next; | |
} | |
if (_state.compare_exchange_weak(state, newstate, | |
std::memory_order_acquire)) { | |
if (!all && waiters) return; // unblocked pre-wait thread | |
if ((state & kStackMask) == kStackMask) return; | |
Waiter* w = &_waiters[state & kStackMask]; | |
if (!all) w->next.store(nullptr, std::memory_order_relaxed); | |
_unpark(w); | |
return; | |
} | |
} | |
} | |
// notify n workers | |
void notify_n(size_t n) { | |
if(n >= _waiters.size()) { | |
notify(true); | |
} | |
else { | |
for(size_t k=0; k<n; ++k) { | |
notify(false); | |
} | |
} | |
} | |
size_t size() const { | |
return _waiters.size(); | |
} | |
private: | |
// State_ layout: | |
// - low kStackBits is a stack of waiters committed wait. | |
// - next kWaiterBits is count of waiters in prewait state. | |
// - next kEpochBits is modification counter. | |
static const uint64_t kStackBits = 16; | |
static const uint64_t kStackMask = (1ull << kStackBits) - 1; | |
static const uint64_t kWaiterBits = 16; | |
static const uint64_t kWaiterShift = 16; | |
static const uint64_t kWaiterMask = ((1ull << kWaiterBits) - 1) | |
<< kWaiterShift; | |
static const uint64_t kWaiterInc = 1ull << kWaiterBits; | |
static const uint64_t kEpochBits = 32; | |
static const uint64_t kEpochShift = 32; | |
static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift; | |
static const uint64_t kEpochInc = 1ull << kEpochShift; | |
std::atomic<uint64_t> _state; | |
std::vector<Waiter> _waiters; | |
void _park(Waiter* w) { | |
std::unique_lock<std::mutex> lock(w->mu); | |
while (w->state != Waiter::kSignaled) { | |
w->state = Waiter::kWaiting; | |
w->cv.wait(lock); | |
} | |
} | |
void _unpark(Waiter* waiters) { | |
Waiter* next = nullptr; | |
for (Waiter* w = waiters; w; w = next) { | |
next = w->next.load(std::memory_order_relaxed); | |
unsigned state; | |
{ | |
std::unique_lock<std::mutex> lock(w->mu); | |
state = w->state; | |
w->state = Waiter::kSignaled; | |
} | |
// Avoid notifying if it wasn't waiting. | |
if (state == Waiter::kWaiting) w->cv.notify_one(); | |
} | |
} | |
}; | |
} // namespace tf ------------------------------------------------------------ | |
/** | |
@file worker.hpp | |
@brief worker include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Class Definition: Worker | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Worker | |
@brief class to create a worker in an executor | |
The class is primarily used by the executor to perform work-stealing algorithm. | |
Users can access a worker object and alter its property | |
(e.g., changing the thread affinity in a POSIX-like system) | |
using tf::WorkerInterface. | |
*/ | |
class Worker { | |
friend class Executor; | |
friend class WorkerView; | |
public: | |
/** | |
@brief queries the worker id associated with its parent executor | |
A worker id is a unsigned integer in the range <tt>[0, N)</tt>, | |
where @c N is the number of workers spawned at the construction | |
time of the executor. | |
*/ | |
inline size_t id() const { return _id; } | |
/** | |
@brief acquires a pointer access to the underlying thread | |
*/ | |
inline std::thread* thread() const { return _thread; } | |
/** | |
@brief queries the size of the queue (i.e., number of enqueued tasks to | |
run) associated with the worker | |
*/ | |
inline size_t queue_size() const { return _wsq.size(); } | |
/** | |
@brief queries the current capacity of the queue | |
*/ | |
inline size_t queue_capacity() const { return static_cast<size_t>(_wsq.capacity()); } | |
private: | |
size_t _id; | |
size_t _vtm; | |
Executor* _executor; | |
std::thread* _thread; | |
Notifier::Waiter* _waiter; | |
std::default_random_engine _rdgen { std::random_device{}() }; | |
TaskQueue<Node*> _wsq; | |
Node* _cache; | |
}; | |
// ---------------------------------------------------------------------------- | |
// Class Definition: PerThreadWorker | |
// ---------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
//struct PerThreadWorker { | |
// | |
// Worker* worker; | |
// | |
// PerThreadWorker() : worker {nullptr} {} | |
// | |
// PerThreadWorker(const PerThreadWorker&) = delete; | |
// PerThreadWorker(PerThreadWorker&&) = delete; | |
// | |
// PerThreadWorker& operator = (const PerThreadWorker&) = delete; | |
// PerThreadWorker& operator = (PerThreadWorker&&) = delete; | |
//}; | |
/** | |
@private | |
*/ | |
//inline PerThreadWorker& this_worker() { | |
// thread_local PerThreadWorker worker; | |
// return worker; | |
//} | |
// ---------------------------------------------------------------------------- | |
// Class Definition: WorkerView | |
// ---------------------------------------------------------------------------- | |
/** | |
@class WorkerView | |
@brief class to create an immutable view of a worker in an executor | |
An executor keeps a set of internal worker threads to run tasks. | |
A worker view provides users an immutable interface to observe | |
when a worker runs a task, and the view object is only accessible | |
from an observer derived from tf::ObserverInterface. | |
*/ | |
class WorkerView { | |
friend class Executor; | |
public: | |
/** | |
@brief queries the worker id associated with its parent executor | |
A worker id is a unsigned integer in the range <tt>[0, N)</tt>, | |
where @c N is the number of workers spawned at the construction | |
time of the executor. | |
*/ | |
size_t id() const; | |
/** | |
@brief queries the size of the queue (i.e., number of pending tasks to | |
run) associated with the worker | |
*/ | |
size_t queue_size() const; | |
/** | |
@brief queries the current capacity of the queue | |
*/ | |
size_t queue_capacity() const; | |
private: | |
WorkerView(const Worker&); | |
WorkerView(const WorkerView&) = default; | |
const Worker& _worker; | |
}; | |
// Constructor | |
inline WorkerView::WorkerView(const Worker& w) : _worker{w} { | |
} | |
// function: id | |
inline size_t WorkerView::id() const { | |
return _worker._id; | |
} | |
// Function: queue_size | |
inline size_t WorkerView::queue_size() const { | |
return _worker._wsq.size(); | |
} | |
// Function: queue_capacity | |
inline size_t WorkerView::queue_capacity() const { | |
return static_cast<size_t>(_worker._wsq.capacity()); | |
} | |
// ---------------------------------------------------------------------------- | |
// Class Definition: WorkerInterface | |
// ---------------------------------------------------------------------------- | |
/** | |
@class WorkerInterface | |
@brief class to configure worker behavior in an executor | |
The tf::WorkerInterface class lets users interact with the executor | |
to customize the worker behavior, | |
such as calling custom methods before and after a worker enters and leaves | |
the loop. | |
When you create an executor, it spawns a set of workers to run tasks. | |
The interaction between the executor and its spawned workers looks like | |
the following: | |
for(size_t n=0; n<num_workers; n++) { | |
create_thread([](Worker& worker) | |
// pre-processing executor-specific worker information | |
// ... | |
// enter the scheduling loop | |
// Here, WorkerInterface::scheduler_prologue is invoked, if any | |
while(1) { | |
perform_work_stealing_algorithm(); | |
if(stop) { | |
break; | |
} | |
} | |
// leaves the scheduling loop and joins this worker thread | |
// Here, WorkerInterface::scheduler_epilogue is invoked, if any | |
); | |
} | |
@note | |
Methods defined in tf::WorkerInterface are not thread-safe and may be | |
be invoked by multiple workers concurrently. | |
*/ | |
class WorkerInterface { | |
public: | |
/** | |
@brief default destructor | |
*/ | |
virtual ~WorkerInterface() = default; | |
/** | |
@brief method to call before a worker enters the scheduling loop | |
@param worker a reference to the worker | |
The method is called by the constructor of an executor. | |
*/ | |
virtual void scheduler_prologue(Worker& worker) = 0; | |
/** | |
@brief method to call after a worker leaves the scheduling loop | |
@param worker a reference to the worker | |
@param ptr an pointer to the exception thrown by the scheduling loop | |
The method is called by the constructor of an executor. | |
*/ | |
virtual void scheduler_epilogue(Worker& worker, std::exception_ptr ptr) = 0; | |
}; | |
/** | |
@brief helper function to create an instance derived from tf::WorkerInterface | |
@tparam T type derived from tf::WorkerInterface | |
@tparam ArgsT argument types to construct @c T | |
@param args arguments to forward to the constructor of @c T | |
*/ | |
template <typename T, typename... ArgsT> | |
std::shared_ptr<T> make_worker_interface(ArgsT&&... args) { | |
static_assert( | |
std::is_base_of_v<WorkerInterface, T>, | |
"T must be derived from WorkerInterface" | |
); | |
return std::make_shared<T>(std::forward<ArgsT>(args)...); | |
} | |
} // end of namespact tf ----------------------------------------------------- | |
/** | |
@file observer.hpp | |
@brief observer include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// timeline data structure | |
// ---------------------------------------------------------------------------- | |
/** | |
@brief default time point type of observers | |
*/ | |
using observer_stamp_t = std::chrono::time_point<std::chrono::steady_clock>; | |
/** | |
@private | |
*/ | |
struct Segment { | |
std::string name; | |
TaskType type; | |
observer_stamp_t beg; | |
observer_stamp_t end; | |
template <typename Archiver> | |
auto save(Archiver& ar) const { | |
return ar(name, type, beg, end); | |
} | |
template <typename Archiver> | |
auto load(Archiver& ar) { | |
return ar(name, type, beg, end); | |
} | |
Segment() = default; | |
Segment( | |
const std::string& n, TaskType t, observer_stamp_t b, observer_stamp_t e | |
) : name {n}, type {t}, beg {b}, end {e} { | |
} | |
auto span() const { | |
return end-beg; | |
} | |
}; | |
/** | |
@private | |
*/ | |
struct Timeline { | |
size_t uid; | |
observer_stamp_t origin; | |
std::vector<std::vector<std::vector<Segment>>> segments; | |
Timeline() = default; | |
Timeline(const Timeline& rhs) = delete; | |
Timeline(Timeline&& rhs) = default; | |
Timeline& operator = (const Timeline& rhs) = delete; | |
Timeline& operator = (Timeline&& rhs) = default; | |
template <typename Archiver> | |
auto save(Archiver& ar) const { | |
return ar(uid, origin, segments); | |
} | |
template <typename Archiver> | |
auto load(Archiver& ar) { | |
return ar(uid, origin, segments); | |
} | |
}; | |
/** | |
@private | |
*/ | |
struct ProfileData { | |
std::vector<Timeline> timelines; | |
ProfileData() = default; | |
ProfileData(const ProfileData& rhs) = delete; | |
ProfileData(ProfileData&& rhs) = default; | |
ProfileData& operator = (const ProfileData& rhs) = delete; | |
ProfileData& operator = (ProfileData&&) = default; | |
template <typename Archiver> | |
auto save(Archiver& ar) const { | |
return ar(timelines); | |
} | |
template <typename Archiver> | |
auto load(Archiver& ar) { | |
return ar(timelines); | |
} | |
}; | |
// ---------------------------------------------------------------------------- | |
// observer interface | |
// ---------------------------------------------------------------------------- | |
/** | |
@class: ObserverInterface | |
@brief class to derive an executor observer | |
The tf::ObserverInterface class allows users to define custom methods to monitor | |
the behaviors of an executor. This is particularly useful when you want to | |
inspect the performance of an executor and visualize when each thread | |
participates in the execution of a task. | |
To prevent users from direct access to the internal threads and tasks, | |
tf::ObserverInterface provides immutable wrappers, | |
tf::WorkerView and tf::TaskView, over workers and tasks. | |
Please refer to tf::WorkerView and tf::TaskView for details. | |
Example usage: | |
@code{.cpp} | |
struct MyObserver : public tf::ObserverInterface { | |
MyObserver(const std::string& name) { | |
std::cout << "constructing observer " << name << '\n'; | |
} | |
void set_up(size_t num_workers) override final { | |
std::cout << "setting up observer with " << num_workers << " workers\n"; | |
} | |
void on_entry(WorkerView w, tf::TaskView tv) override final { | |
std::ostringstream oss; | |
oss << "worker " << w.id() << " ready to run " << tv.name() << '\n'; | |
std::cout << oss.str(); | |
} | |
void on_exit(WorkerView w, tf::TaskView tv) override final { | |
std::ostringstream oss; | |
oss << "worker " << w.id() << " finished running " << tv.name() << '\n'; | |
std::cout << oss.str(); | |
} | |
}; | |
tf::Taskflow taskflow; | |
tf::Executor executor; | |
// insert tasks into taskflow | |
// ... | |
// create a custom observer | |
std::shared_ptr<MyObserver> observer = executor.make_observer<MyObserver>("MyObserver"); | |
// run the taskflow | |
executor.run(taskflow).wait(); | |
@endcode | |
*/ | |
class ObserverInterface { | |
public: | |
/** | |
@brief virtual destructor | |
*/ | |
virtual ~ObserverInterface() = default; | |
/** | |
@brief constructor-like method to call when the executor observer is fully created | |
@param num_workers the number of the worker threads in the executor | |
*/ | |
virtual void set_up(size_t num_workers) = 0; | |
/** | |
@brief method to call before a worker thread executes a closure | |
@param wv an immutable view of this worker thread | |
@param task_view a constant wrapper object to the task | |
*/ | |
virtual void on_entry(WorkerView wv, TaskView task_view) = 0; | |
/** | |
@brief method to call after a worker thread executed a closure | |
@param wv an immutable view of this worker thread | |
@param task_view a constant wrapper object to the task | |
*/ | |
virtual void on_exit(WorkerView wv, TaskView task_view) = 0; | |
}; | |
// ---------------------------------------------------------------------------- | |
// ChromeObserver definition | |
// ---------------------------------------------------------------------------- | |
/** | |
@class: ChromeObserver | |
@brief class to create an observer based on Chrome tracing format | |
A tf::ChromeObserver inherits tf::ObserverInterface and defines methods to dump | |
the observed thread activities into a format that can be visualized through | |
@ChromeTracing. | |
@code{.cpp} | |
tf::Taskflow taskflow; | |
tf::Executor executor; | |
// insert tasks into taskflow | |
// ... | |
// create a custom observer | |
std::shared_ptr<tf::ChromeObserver> observer = executor.make_observer<tf::ChromeObserver>(); | |
// run the taskflow | |
executor.run(taskflow).wait(); | |
// dump the thread activities to a chrome-tracing format. | |
observer->dump(std::cout); | |
@endcode | |
*/ | |
class ChromeObserver : public ObserverInterface { | |
friend class Executor; | |
// data structure to record each task execution | |
struct Segment { | |
std::string name; | |
observer_stamp_t beg; | |
observer_stamp_t end; | |
Segment( | |
const std::string& n, | |
observer_stamp_t b, | |
observer_stamp_t e | |
); | |
}; | |
// data structure to store the entire execution timeline | |
struct Timeline { | |
observer_stamp_t origin; | |
std::vector<std::vector<Segment>> segments; | |
std::vector<std::stack<observer_stamp_t>> stacks; | |
}; | |
public: | |
/** | |
@brief dumps the timelines into a @ChromeTracing format through | |
an output stream | |
*/ | |
void dump(std::ostream& ostream) const; | |
/** | |
@brief dumps the timelines into a @ChromeTracing format | |
*/ | |
inline std::string dump() const; | |
/** | |
@brief clears the timeline data | |
*/ | |
inline void clear(); | |
/** | |
@brief queries the number of tasks observed | |
*/ | |
inline size_t num_tasks() const; | |
private: | |
inline void set_up(size_t num_workers) override final; | |
inline void on_entry(WorkerView w, TaskView task_view) override final; | |
inline void on_exit(WorkerView w, TaskView task_view) override final; | |
Timeline _timeline; | |
}; | |
// constructor | |
inline ChromeObserver::Segment::Segment( | |
const std::string& n, observer_stamp_t b, observer_stamp_t e | |
) : | |
name {n}, beg {b}, end {e} { | |
} | |
// Procedure: set_up | |
inline void ChromeObserver::set_up(size_t num_workers) { | |
_timeline.segments.resize(num_workers); | |
_timeline.stacks.resize(num_workers); | |
for(size_t w=0; w<num_workers; ++w) { | |
_timeline.segments[w].reserve(32); | |
} | |
_timeline.origin = observer_stamp_t::clock::now(); | |
} | |
// Procedure: on_entry | |
inline void ChromeObserver::on_entry(WorkerView wv, TaskView) { | |
_timeline.stacks[wv.id()].push(observer_stamp_t::clock::now()); | |
} | |
// Procedure: on_exit | |
inline void ChromeObserver::on_exit(WorkerView wv, TaskView tv) { | |
size_t w = wv.id(); | |
assert(!_timeline.stacks[w].empty()); | |
auto beg = _timeline.stacks[w].top(); | |
_timeline.stacks[w].pop(); | |
_timeline.segments[w].emplace_back( | |
tv.name(), beg, observer_stamp_t::clock::now() | |
); | |
} | |
// Function: clear | |
inline void ChromeObserver::clear() { | |
for(size_t w=0; w<_timeline.segments.size(); ++w) { | |
_timeline.segments[w].clear(); | |
while(!_timeline.stacks[w].empty()) { | |
_timeline.stacks[w].pop(); | |
} | |
} | |
} | |
// Procedure: dump | |
inline void ChromeObserver::dump(std::ostream& os) const { | |
using namespace std::chrono; | |
size_t first; | |
for(first = 0; first<_timeline.segments.size(); ++first) { | |
if(_timeline.segments[first].size() > 0) { | |
break; | |
} | |
} | |
os << '['; | |
for(size_t w=first; w<_timeline.segments.size(); w++) { | |
if(w != first && _timeline.segments[w].size() > 0) { | |
os << ','; | |
} | |
for(size_t i=0; i<_timeline.segments[w].size(); i++) { | |
os << '{'<< "\"cat\":\"ChromeObserver\","; | |
// name field | |
os << "\"name\":\""; | |
if(_timeline.segments[w][i].name.empty()) { | |
os << w << '_' << i; | |
} | |
else { | |
os << _timeline.segments[w][i].name; | |
} | |
os << "\","; | |
// segment field | |
os << "\"ph\":\"X\"," | |
<< "\"pid\":1," | |
<< "\"tid\":" << w << ',' | |
<< "\"ts\":" << duration_cast<microseconds>( | |
_timeline.segments[w][i].beg - _timeline.origin | |
).count() << ',' | |
<< "\"dur\":" << duration_cast<microseconds>( | |
_timeline.segments[w][i].end - _timeline.segments[w][i].beg | |
).count(); | |
if(i != _timeline.segments[w].size() - 1) { | |
os << "},"; | |
} | |
else { | |
os << '}'; | |
} | |
} | |
} | |
os << "]\n"; | |
} | |
// Function: dump | |
inline std::string ChromeObserver::dump() const { | |
std::ostringstream oss; | |
dump(oss); | |
return oss.str(); | |
} | |
// Function: num_tasks | |
inline size_t ChromeObserver::num_tasks() const { | |
return std::accumulate( | |
_timeline.segments.begin(), _timeline.segments.end(), size_t{0}, | |
[](size_t sum, const auto& exe){ | |
return sum + exe.size(); | |
} | |
); | |
} | |
// ---------------------------------------------------------------------------- | |
// TFProfObserver definition | |
// ---------------------------------------------------------------------------- | |
/** | |
@class TFProfObserver | |
@brief class to create an observer based on the built-in taskflow profiler format | |
A tf::TFProfObserver inherits tf::ObserverInterface and defines methods to dump | |
the observed thread activities into a format that can be visualized through | |
@TFProf. | |
@code{.cpp} | |
tf::Taskflow taskflow; | |
tf::Executor executor; | |
// insert tasks into taskflow | |
// ... | |
// create a custom observer | |
std::shared_ptr<tf::TFProfObserver> observer = executor.make_observer<tf::TFProfObserver>(); | |
// run the taskflow | |
executor.run(taskflow).wait(); | |
// dump the thread activities to Taskflow Profiler format. | |
observer->dump(std::cout); | |
@endcode | |
*/ | |
class TFProfObserver : public ObserverInterface { | |
friend class Executor; | |
friend class TFProfManager; | |
/** @private overall task summary */ | |
struct TaskSummary { | |
size_t count {0}; | |
size_t total_span {0}; | |
size_t min_span; | |
size_t max_span; | |
float avg_span() const { return total_span * 1.0f / count; } | |
}; | |
/** @private worker summary at a level */ | |
struct WorkerSummary { | |
size_t id; | |
size_t level; | |
size_t count {0}; | |
size_t total_span {0}; | |
size_t min_span{0}; | |
size_t max_span{0}; | |
std::array<TaskSummary, TASK_TYPES.size()> tsum; | |
float avg_span() const { return total_span * 1.0f / count; } | |
//return count < 2 ? 0.0f : total_delay * 1.0f / (count-1); | |
}; | |
/** @private */ | |
struct Summary { | |
std::array<TaskSummary, TASK_TYPES.size()> tsum; | |
std::vector<WorkerSummary> wsum; | |
void dump_tsum(std::ostream&) const; | |
void dump_wsum(std::ostream&) const; | |
void dump(std::ostream&) const; | |
}; | |
public: | |
/** | |
@brief dumps the timelines into a @TFProf format through | |
an output stream | |
*/ | |
void dump(std::ostream& ostream) const; | |
/** | |
@brief dumps the timelines into a JSON string | |
*/ | |
std::string dump() const; | |
/** | |
@brief shows the summary report through an output stream | |
*/ | |
void summary(std::ostream& ostream) const; | |
/** | |
@brief returns the summary report in a string | |
*/ | |
std::string summary() const; | |
/** | |
@brief clears the timeline data | |
*/ | |
void clear(); | |
/** | |
@brief queries the number of tasks observed | |
*/ | |
size_t num_tasks() const; | |
/** | |
@brief queries the number of observed workers | |
*/ | |
size_t num_workers() const; | |
private: | |
Timeline _timeline; | |
std::vector<std::stack<observer_stamp_t>> _stacks; | |
inline void set_up(size_t num_workers) override final; | |
inline void on_entry(WorkerView, TaskView) override final; | |
inline void on_exit(WorkerView, TaskView) override final; | |
}; | |
// dump the task summary | |
inline void TFProfObserver::Summary::dump_tsum(std::ostream& os) const { | |
// task summary | |
size_t type_w{10}, count_w{5}, time_w{9}, avg_w{8}, min_w{8}, max_w{8}; | |
std::for_each(tsum.begin(), tsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
count_w = std::max(count_w, std::to_string(i.count).size()); | |
}); | |
std::for_each(tsum.begin(), tsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
time_w = std::max(time_w, std::to_string(i.total_span).size()); | |
}); | |
std::for_each(tsum.begin(), tsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
avg_w = std::max(time_w, std::to_string(i.avg_span()).size()); | |
}); | |
std::for_each(tsum.begin(), tsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
min_w = std::max(min_w, std::to_string(i.min_span).size()); | |
}); | |
std::for_each(tsum.begin(), tsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
max_w = std::max(max_w, std::to_string(i.max_span).size()); | |
}); | |
os << std::setw(type_w) << "-Task-" | |
<< std::setw(count_w+2) << "Count" | |
<< std::setw(time_w+2) << "Time (us)" | |
<< std::setw(avg_w+2) << "Avg (us)" | |
<< std::setw(min_w+2) << "Min (us)" | |
<< std::setw(max_w+2) << "Max (us)" | |
<< '\n'; | |
for(size_t i=0; i<TASK_TYPES.size(); i++) { | |
if(tsum[i].count == 0) { | |
continue; | |
} | |
os << std::setw(type_w) << to_string(TASK_TYPES[i]) | |
<< std::setw(count_w+2) << tsum[i].count | |
<< std::setw(time_w+2) << tsum[i].total_span | |
<< std::setw(avg_w+2) << std::to_string(tsum[i].avg_span()) | |
<< std::setw(min_w+2) << tsum[i].min_span | |
<< std::setw(max_w+2) << tsum[i].max_span | |
<< '\n'; | |
} | |
} | |
// dump the worker summary | |
inline void TFProfObserver::Summary::dump_wsum(std::ostream& os) const { | |
// task summary | |
size_t w_w{10}, t_w{10}, l_w{5}, c_w{5}, d_w{9}, avg_w{8}, min_w{8}, max_w{8}; | |
std::for_each(wsum.begin(), wsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
l_w = std::max(l_w, std::to_string(i.level).size()); | |
}); | |
std::for_each(wsum.begin(), wsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
c_w = std::max(c_w, std::to_string(i.count).size()); | |
}); | |
std::for_each(wsum.begin(), wsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
d_w = std::max(d_w, std::to_string(i.total_span).size()); | |
}); | |
std::for_each(wsum.begin(), wsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
avg_w = std::max(avg_w, std::to_string(i.avg_span()).size()); | |
}); | |
std::for_each(wsum.begin(), wsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
min_w = std::max(min_w, std::to_string(i.min_span).size()); | |
}); | |
std::for_each(wsum.begin(), wsum.end(), [&](const auto& i){ | |
if(i.count == 0) return; | |
max_w = std::max(max_w, std::to_string(i.max_span).size()); | |
}); | |
os << std::setw(w_w) << "-Worker-" | |
<< std::setw(l_w+2) << "Level" | |
<< std::setw(t_w) << "Task" | |
<< std::setw(c_w+2) << "Count" | |
<< std::setw(d_w+2) << "Time (us)" | |
<< std::setw(avg_w+2) << "Avg (us)" | |
<< std::setw(min_w+2) << "Min (us)" | |
<< std::setw(max_w+2) << "Max (us)" | |
<< '\n'; | |
for(const auto& ws : wsum) { | |
if(ws.count == 0) { | |
continue; | |
} | |
os << std::setw(w_w) << ws.id | |
<< std::setw(l_w+2) << ws.level; | |
bool first = true; | |
for(size_t i=0; i<TASK_TYPES.size(); i++) { | |
if(ws.tsum[i].count == 0) { | |
continue; | |
} | |
os << (first ? std::setw(t_w) : std::setw(w_w + l_w + 2 + t_w)); | |
first = false; | |
os << to_string(TASK_TYPES[i]) | |
<< std::setw(c_w+2) << ws.tsum[i].count | |
<< std::setw(d_w+2) << ws.tsum[i].total_span | |
<< std::setw(avg_w+2) << std::to_string(ws.tsum[i].avg_span()) | |
<< std::setw(min_w+2) << ws.tsum[i].min_span | |
<< std::setw(max_w+2) << ws.tsum[i].max_span | |
<< '\n'; | |
} | |
// per-worker summary | |
os << std::setw(w_w + l_w + t_w + c_w + 4) << ws.count | |
<< std::setw(d_w+2) << ws.total_span | |
<< std::setw(avg_w+2) << std::to_string(ws.avg_span()) | |
<< std::setw(min_w+2) << ws.min_span | |
<< std::setw(max_w+2) << ws.max_span | |
<< '\n'; | |
//for(size_t j=0; j<w_w+l_w+t_w+4; j++) os << ' '; | |
//for(size_t j=0; j<c_w+d_w+avg_w+min_w+max_w+8; j++) os << '-'; | |
//os <<'\n'; | |
} | |
} | |
// dump the summary report through an ostream | |
inline void TFProfObserver::Summary::dump(std::ostream& os) const { | |
dump_tsum(os); | |
os << '\n'; | |
dump_wsum(os); | |
} | |
// Procedure: set_up | |
inline void TFProfObserver::set_up(size_t num_workers) { | |
_timeline.uid = unique_id<size_t>(); | |
_timeline.origin = observer_stamp_t::clock::now(); | |
_timeline.segments.resize(num_workers); | |
_stacks.resize(num_workers); | |
} | |
// Procedure: on_entry | |
inline void TFProfObserver::on_entry(WorkerView wv, TaskView) { | |
_stacks[wv.id()].push(observer_stamp_t::clock::now()); | |
} | |
// Procedure: on_exit | |
inline void TFProfObserver::on_exit(WorkerView wv, TaskView tv) { | |
size_t w = wv.id(); | |
assert(!_stacks[w].empty()); | |
if(_stacks[w].size() > _timeline.segments[w].size()) { | |
_timeline.segments[w].resize(_stacks[w].size()); | |
} | |
auto beg = _stacks[w].top(); | |
_stacks[w].pop(); | |
_timeline.segments[w][_stacks[w].size()].emplace_back( | |
tv.name(), tv.type(), beg, observer_stamp_t::clock::now() | |
); | |
} | |
// Function: clear | |
inline void TFProfObserver::clear() { | |
for(size_t w=0; w<_timeline.segments.size(); ++w) { | |
for(size_t l=0; l<_timeline.segments[w].size(); ++l) { | |
_timeline.segments[w][l].clear(); | |
} | |
while(!_stacks[w].empty()) { | |
_stacks[w].pop(); | |
} | |
} | |
} | |
// Procedure: dump | |
inline void TFProfObserver::dump(std::ostream& os) const { | |
using namespace std::chrono; | |
size_t first; | |
for(first = 0; first<_timeline.segments.size(); ++first) { | |
if(_timeline.segments[first].size() > 0) { | |
break; | |
} | |
} | |
// not timeline data to dump | |
if(first == _timeline.segments.size()) { | |
os << "{}\n"; | |
return; | |
} | |
os << "{\"executor\":\"" << _timeline.uid << "\",\"data\":["; | |
bool comma = false; | |
for(size_t w=first; w<_timeline.segments.size(); w++) { | |
for(size_t l=0; l<_timeline.segments[w].size(); l++) { | |
if(_timeline.segments[w][l].empty()) { | |
continue; | |
} | |
if(comma) { | |
os << ','; | |
} | |
else { | |
comma = true; | |
} | |
os << "{\"worker\":" << w << ",\"level\":" << l << ",\"data\":["; | |
for(size_t i=0; i<_timeline.segments[w][l].size(); ++i) { | |
const auto& s = _timeline.segments[w][l][i]; | |
if(i) os << ','; | |
// span | |
os << "{\"span\":[" | |
<< duration_cast<microseconds>(s.beg - _timeline.origin).count() | |
<< "," | |
<< duration_cast<microseconds>(s.end - _timeline.origin).count() | |
<< "],"; | |
// name | |
os << "\"name\":\""; | |
if(s.name.empty()) { | |
os << w << '_' << i; | |
} | |
else { | |
os << s.name; | |
} | |
os << "\","; | |
// e.g., category "type": "Condition Task" | |
os << "\"type\":\"" << to_string(s.type) << "\""; | |
os << "}"; | |
} | |
os << "]}"; | |
} | |
} | |
os << "]}\n"; | |
} | |
// Function: dump | |
inline std::string TFProfObserver::dump() const { | |
std::ostringstream oss; | |
dump(oss); | |
return oss.str(); | |
} | |
// Procedure: summary | |
inline void TFProfObserver::summary(std::ostream& os) const { | |
using namespace std::chrono; | |
Summary summary; | |
std::optional<observer_stamp_t> view_beg, view_end; | |
// find the first non-empty worker | |
size_t first; | |
for(first = 0; first<_timeline.segments.size(); ++first) { | |
if(_timeline.segments[first].size() > 0) { | |
break; | |
} | |
} | |
// not timeline data to dump | |
if(first == _timeline.segments.size()) { | |
goto end_of_summary; | |
} | |
for(size_t w=first; w<_timeline.segments.size(); w++) { | |
for(size_t l=0; l<_timeline.segments[w].size(); l++) { | |
if(_timeline.segments[w][l].empty()) { | |
continue; | |
} | |
// worker w at level l | |
WorkerSummary ws; | |
ws.id = w; | |
ws.level = l; | |
ws.count = _timeline.segments[w][l].size(); | |
// scan all tasks at level l | |
for(size_t i=0; i<_timeline.segments[w][l].size(); ++i) { | |
// update the entire span | |
auto& s = _timeline.segments[w][l][i]; | |
view_beg = view_beg ? std::min(*view_beg, s.beg) : s.beg; | |
view_end = view_end ? std::max(*view_end, s.end) : s.end; | |
// update the task summary | |
size_t t = duration_cast<microseconds>(s.end - s.beg).count(); | |
auto& x = summary.tsum[static_cast<int>(s.type)]; | |
x.count += 1; | |
x.total_span += t; | |
x.min_span = (x.count == 1) ? t : std::min(t, x.min_span); | |
x.max_span = (x.count == 1) ? t : std::max(t, x.max_span); | |
// update the worker summary | |
ws.total_span += t; | |
ws.min_span = (i == 0) ? t : std::min(t, ws.min_span); | |
ws.max_span = (i == 0) ? t : std::max(t, ws.max_span); | |
auto&y = ws.tsum[static_cast<int>(s.type)]; | |
y.count += 1; | |
y.total_span += t; | |
y.min_span = (y.count == 1) ? t : std::min(t, y.min_span); | |
y.max_span = (y.count == 1) ? t : std::max(t, y.max_span); | |
// update the delay | |
//if(i) { | |
// size_t d = duration_cast<nanoseconds>( | |
// s.beg - _timeline.segments[w][l][i-1].end | |
// ).count(); | |
// ws.total_delay += d; | |
// ws.min_delay = (i == 1) ? d : std::min(ws.min_delay, d); | |
// ws.max_delay = (i == 1) ? d : std::max(ws.max_delay, d); | |
//} | |
} | |
summary.wsum.push_back(ws); | |
} | |
} | |
end_of_summary: | |
size_t view = 0; | |
if(view_beg && view_end) { | |
view = duration_cast<microseconds>(*view_end - *view_beg).count(); | |
} | |
os << "==Observer " << _timeline.uid << ": " | |
<< num_workers() << " workers completed " | |
<< num_tasks() << " tasks in " | |
<< view << " us\n"; | |
summary.dump(os); | |
} | |
// Procedure: summary | |
inline std::string TFProfObserver::summary() const { | |
std::ostringstream oss; | |
summary(oss); | |
return oss.str(); | |
} | |
// Function: num_tasks | |
inline size_t TFProfObserver::num_tasks() const { | |
size_t s = 0; | |
for(size_t w=0; w<_timeline.segments.size(); ++w) { | |
for(size_t l=0; l<_timeline.segments[w].size(); ++l) { | |
s += _timeline.segments[w][l].size(); | |
} | |
} | |
return s; | |
} | |
// Function: num_workers | |
inline size_t TFProfObserver::num_workers() const { | |
size_t w = 0; | |
for(size_t i=0; i<_timeline.segments.size(); ++i) { | |
w += (!_timeline.segments[i].empty()); | |
} | |
return w; | |
} | |
// ---------------------------------------------------------------------------- | |
// TFProfManager | |
// ---------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
class TFProfManager { | |
friend class Executor; | |
public: | |
~TFProfManager(); | |
TFProfManager(const TFProfManager&) = delete; | |
TFProfManager& operator=(const TFProfManager&) = delete; | |
static TFProfManager& get(); | |
void dump(std::ostream& ostream) const; | |
private: | |
const std::string _fpath; | |
std::mutex _mutex; | |
std::vector<std::shared_ptr<TFProfObserver>> _observers; | |
TFProfManager(); | |
void _manage(std::shared_ptr<TFProfObserver> observer); | |
}; | |
// constructor | |
inline TFProfManager::TFProfManager() : | |
_fpath {get_env(TF_ENABLE_PROFILER)} { | |
} | |
// Procedure: manage | |
inline void TFProfManager::_manage(std::shared_ptr<TFProfObserver> observer) { | |
std::lock_guard lock(_mutex); | |
_observers.push_back(std::move(observer)); | |
} | |
// Procedure: dump | |
inline void TFProfManager::dump(std::ostream& os) const { | |
for(size_t i=0; i<_observers.size(); ++i) { | |
if(i) os << ','; | |
_observers[i]->dump(os); | |
} | |
} | |
// Destructor | |
inline TFProfManager::~TFProfManager() { | |
std::ofstream ofs(_fpath); | |
if(ofs) { | |
// .tfp | |
if(_fpath.rfind(".tfp") != std::string::npos) { | |
ProfileData data; | |
data.timelines.reserve(_observers.size()); | |
for(size_t i=0; i<_observers.size(); ++i) { | |
data.timelines.push_back(std::move(_observers[i]->_timeline)); | |
} | |
Serializer<std::ofstream> serializer(ofs); | |
serializer(data); | |
} | |
// .json | |
else { // if(_fpath.rfind(".json") != std::string::npos) { | |
ofs << "[\n"; | |
for(size_t i=0; i<_observers.size(); ++i) { | |
if(i) ofs << ','; | |
_observers[i]->dump(ofs); | |
} | |
ofs << "]\n"; | |
} | |
} | |
// do a summary report in stderr for each observer | |
else { | |
std::ostringstream oss; | |
for(size_t i=0; i<_observers.size(); ++i) { | |
_observers[i]->summary(oss); | |
} | |
fprintf(stderr, "%s", oss.str().c_str()); | |
} | |
} | |
// Function: get | |
inline TFProfManager& TFProfManager::get() { | |
static TFProfManager mgr; | |
return mgr; | |
} | |
// ---------------------------------------------------------------------------- | |
// Identifier for Each Built-in Observer | |
// ---------------------------------------------------------------------------- | |
/** @enum ObserverType | |
@brief enumeration of all observer types | |
*/ | |
enum class ObserverType : int { | |
TFPROF = 0, | |
CHROME, | |
UNDEFINED | |
}; | |
/** | |
@brief convert an observer type to a human-readable string | |
*/ | |
inline const char* to_string(ObserverType type) { | |
switch(type) { | |
case ObserverType::TFPROF: return "tfprof"; | |
case ObserverType::CHROME: return "chrome"; | |
default: return "undefined"; | |
} | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
// reference: | |
// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c | |
// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp_dispatch.cpp | |
/** | |
@file partitioner.hpp | |
@brief partitioner include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Partitioner Base | |
// ---------------------------------------------------------------------------- | |
/** | |
@class PartitionerBase | |
@brief class to derive a partitioner for scheduling parallel algorithms | |
The class provides base methods to derive a partitioner that can be used | |
to schedule parallel iterations (e.g., tf::Taskflow::for_each). | |
An partitioner defines the scheduling method for running parallel algorithms, | |
such tf::Taskflow::for_each, tf::Taskflow::reduce, and so on. | |
By default, we provide the following partitioners: | |
+ tf::GuidedPartitioner to enable guided scheduling algorithm of adaptive chunk size | |
+ tf::DynamicPartitioner to enable dynamic scheduling algorithm of equal chunk size | |
+ tf::StaticPartitioner to enable static scheduling algorithm of static chunk size | |
+ tf::RandomPartitioner to enable random scheduling algorithm of random chunk size | |
Depending on applications, partitioning algorithms can impact the performance | |
a lot. | |
For example, if a parallel-iteration workload contains a regular work unit per | |
iteration, tf::StaticPartitioner can deliver the best performance. | |
On the other hand, if the work unit per iteration is irregular and unbalanced, | |
tf::GuidedPartitioner or tf::DynamicPartitioner can outperform tf::StaticPartitioner. | |
In most situations, tf::GuidedPartitioner can deliver decent performance and | |
is thus used as our default partitioner. | |
*/ | |
class PartitionerBase { | |
public: | |
/** | |
@brief default constructor | |
*/ | |
PartitionerBase() = default; | |
/** | |
@brief construct a partitioner with the given chunk size | |
*/ | |
explicit PartitionerBase(size_t chunk_size) : _chunk_size {chunk_size} {} | |
/** | |
@brief query the chunk size of this partitioner | |
*/ | |
size_t chunk_size() const { return _chunk_size; } | |
/** | |
@brief update the chunk size of this partitioner | |
*/ | |
void chunk_size(size_t cz) { _chunk_size = cz; } | |
protected: | |
/** | |
@brief chunk size | |
*/ | |
size_t _chunk_size{0}; | |
}; | |
// ---------------------------------------------------------------------------- | |
// Guided Partitioner | |
// ---------------------------------------------------------------------------- | |
/** | |
@class GuidedPartitioner | |
@brief class to construct a guided partitioner for scheduling parallel algorithms | |
The size of a partition is proportional to the number of unassigned iterations | |
divided by the number of workers, | |
and the size will gradually decrease to the given chunk size. | |
The last partition may be smaller than the chunk size. | |
*/ | |
class GuidedPartitioner : public PartitionerBase { | |
public: | |
/** | |
@brief default constructor | |
*/ | |
GuidedPartitioner() : PartitionerBase{1} {} | |
/** | |
@brief construct a guided partitioner with the given chunk size | |
*/ | |
explicit GuidedPartitioner(size_t sz) : PartitionerBase (sz) {} | |
// -------------------------------------------------------------------------- | |
// scheduling methods | |
// -------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<void, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop( | |
size_t N, | |
size_t W, | |
std::atomic<size_t>& next, | |
F&& func | |
) const { | |
size_t chunk_size = (_chunk_size == 0) ? size_t{1} : _chunk_size; | |
size_t p1 = 2 * W * (chunk_size + 1); | |
float p2 = 0.5f / static_cast<float>(W); | |
size_t curr_b = next.load(std::memory_order_relaxed); | |
while(curr_b < N) { | |
size_t r = N - curr_b; | |
// fine-grained | |
if(r < p1) { | |
while(1) { | |
curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
if(curr_b >= N) { | |
return; | |
} | |
func(curr_b, std::min(curr_b + chunk_size, N)); | |
} | |
break; | |
} | |
// coarse-grained | |
else { | |
size_t q = static_cast<size_t>(p2 * r); | |
if(q < chunk_size) { | |
q = chunk_size; | |
} | |
//size_t curr_e = (q <= r) ? curr_b + q : N; | |
size_t curr_e = std::min(curr_b + q, N); | |
if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed, | |
std::memory_order_relaxed)) { | |
func(curr_b, curr_e); | |
curr_b = next.load(std::memory_order_relaxed); | |
} | |
} | |
} | |
} | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<bool, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop_until( | |
size_t N, | |
size_t W, | |
std::atomic<size_t>& next, | |
F&& func | |
) const { | |
size_t chunk_size = (_chunk_size == 0) ? size_t{1} : _chunk_size; | |
size_t p1 = 2 * W * (chunk_size + 1); | |
float p2 = 0.5f / static_cast<float>(W); | |
size_t curr_b = next.load(std::memory_order_relaxed); | |
while(curr_b < N) { | |
size_t r = N - curr_b; | |
// fine-grained | |
if(r < p1) { | |
while(1) { | |
curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
if(curr_b >= N) { | |
return; | |
} | |
if(func(curr_b, std::min(curr_b + chunk_size, N))) { | |
return; | |
} | |
} | |
break; | |
} | |
// coarse-grained | |
else { | |
size_t q = static_cast<size_t>(p2 * r); | |
if(q < chunk_size) { | |
q = chunk_size; | |
} | |
//size_t curr_e = (q <= r) ? curr_b + q : N; | |
size_t curr_e = std::min(curr_b + q, N); | |
if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed, | |
std::memory_order_relaxed)) { | |
if(func(curr_b, curr_e)) { | |
return; | |
} | |
curr_b = next.load(std::memory_order_relaxed); | |
} | |
} | |
} | |
} | |
}; | |
// ---------------------------------------------------------------------------- | |
// Dynamic Partitioner | |
// ---------------------------------------------------------------------------- | |
/** | |
@class DynamicPartitioner | |
@brief class to construct a dynamic partitioner for scheduling parallel algorithms | |
The partitioner splits iterations into many partitions each of size equal to | |
the given chunk size. | |
Different partitions are distributed dynamically to workers | |
without any specific order. | |
*/ | |
class DynamicPartitioner : public PartitionerBase { | |
public: | |
/** | |
@brief default constructor | |
*/ | |
DynamicPartitioner() : PartitionerBase{1} {}; | |
/** | |
@brief construct a dynamic partitioner with the given chunk size | |
*/ | |
explicit DynamicPartitioner(size_t sz) : PartitionerBase (sz) {} | |
// -------------------------------------------------------------------------- | |
// scheduling methods | |
// -------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<void, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop( | |
size_t N, | |
size_t, | |
std::atomic<size_t>& next, | |
F&& func | |
) const { | |
size_t chunk_size = (_chunk_size == 0) ? size_t{1} : _chunk_size; | |
size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
while(curr_b < N) { | |
func(curr_b, std::min(curr_b + chunk_size, N)); | |
curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
} | |
} | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<bool, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop_until( | |
size_t N, | |
size_t, | |
std::atomic<size_t>& next, | |
F&& func | |
) const { | |
size_t chunk_size = (_chunk_size == 0) ? size_t{1} : _chunk_size; | |
size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
while(curr_b < N) { | |
if(func(curr_b, std::min(curr_b + chunk_size, N))) { | |
return; | |
} | |
curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
} | |
} | |
}; | |
// ---------------------------------------------------------------------------- | |
// Static Partitioner | |
// ---------------------------------------------------------------------------- | |
/** | |
@class StaticPartitioner | |
@brief class to construct a dynamic partitioner for scheduling parallel algorithms | |
The partitioner divides iterations into chunks and distributes chunks | |
to workers in order. | |
If the chunk size is not specified (default @c 0), the partitioner resorts to a chunk size | |
that equally distributes iterations into workers. | |
@code{.cpp} | |
std::vector<int> data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} | |
taskflow.for_each( | |
data.begin(), data.end(), [](int i){}, StaticPartitioner(0) | |
); | |
executor.run(taskflow).run(); | |
@endcode | |
*/ | |
class StaticPartitioner : public PartitionerBase { | |
public: | |
/** | |
@brief default constructor | |
*/ | |
StaticPartitioner() : PartitionerBase{0} {}; | |
/** | |
@brief construct a dynamic partitioner with the given chunk size | |
*/ | |
explicit StaticPartitioner(size_t sz) : PartitionerBase(sz) {} | |
/** | |
@brief queries the adjusted chunk size | |
Returns the given chunk size if it is not zero, or returns | |
<tt>N/W + (w < N%W)</tt>, where @c N is the number of iterations, | |
@c W is the number of workers, and @c w is the worker ID. | |
*/ | |
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const { | |
return _chunk_size ? _chunk_size : N/W + (w < N%W); | |
} | |
// -------------------------------------------------------------------------- | |
// scheduling methods | |
// -------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<void, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop( | |
size_t N, | |
size_t W, | |
size_t curr_b, | |
size_t chunk_size, | |
F&& func | |
) { | |
size_t stride = W * chunk_size; | |
while(curr_b < N) { | |
size_t curr_e = std::min(curr_b + chunk_size, N); | |
func(curr_b, curr_e); | |
curr_b += stride; | |
} | |
} | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<bool, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop_until( | |
size_t N, | |
size_t W, | |
size_t curr_b, | |
size_t chunk_size, | |
F&& func | |
) { | |
size_t stride = W * chunk_size; | |
while(curr_b < N) { | |
size_t curr_e = std::min(curr_b + chunk_size, N); | |
if(func(curr_b, curr_e)) { | |
return; | |
} | |
curr_b += stride; | |
} | |
} | |
}; | |
// ---------------------------------------------------------------------------- | |
// RandomPartitioner | |
// ---------------------------------------------------------------------------- | |
/** | |
@class RandomPartitioner | |
@brief class to construct a random partitioner for scheduling parallel algorithms | |
Similar to tf::DynamicPartitioner, | |
the partitioner splits iterations into many partitions but each with a random | |
chunk size in the range, <tt>c = [alpha * N * W, beta * N * W]</tt>. | |
By default, @c alpha is <tt>0.01</tt> and @c beta is <tt>0.5</tt>, respectively. | |
*/ | |
class RandomPartitioner : public PartitionerBase { | |
public: | |
/** | |
@brief default constructor | |
*/ | |
RandomPartitioner() = default; | |
/** | |
@brief constructs a random partitioner | |
*/ | |
RandomPartitioner(size_t cz) : PartitionerBase(cz) {} | |
/** | |
@brief constructs a random partitioner with the given parameters | |
*/ | |
RandomPartitioner(float alpha, float beta) : _alpha {alpha}, _beta {beta} {} | |
/** | |
@brief queries the @c alpha value | |
*/ | |
float alpha() const { return _alpha; } | |
/** | |
@brief queries the @c beta value | |
*/ | |
float beta() const { return _beta; } | |
/** | |
@brief queries the range of chunk size | |
@param N number of iterations | |
@param W number of workers | |
*/ | |
std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const { | |
size_t b1 = static_cast<size_t>(_alpha * N * W); | |
size_t b2 = static_cast<size_t>(_beta * N * W); | |
if(b1 > b2) { | |
std::swap(b1, b2); | |
} | |
b1 = std::max(b1, size_t{1}); | |
b2 = std::max(b2, b1 + 1); | |
return {b1, b2}; | |
} | |
// -------------------------------------------------------------------------- | |
// scheduling methods | |
// -------------------------------------------------------------------------- | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<void, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop( | |
size_t N, | |
size_t W, | |
std::atomic<size_t>& next, | |
F&& func | |
) const { | |
auto [b1, b2] = chunk_size_range(N, W); | |
std::default_random_engine engine {std::random_device{}()}; | |
std::uniform_int_distribution<size_t> dist(b1, b2); | |
size_t chunk_size = dist(engine); | |
size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
while(curr_b < N) { | |
func(curr_b, std::min(curr_b + chunk_size, N)); | |
chunk_size = dist(engine); | |
curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
} | |
} | |
/** | |
@private | |
*/ | |
template <typename F, | |
std::enable_if_t<std::is_invocable_r_v<bool, F, size_t, size_t>, void>* = nullptr | |
> | |
void loop_until( | |
size_t N, | |
size_t W, | |
std::atomic<size_t>& next, | |
F&& func | |
) const { | |
auto [b1, b2] = chunk_size_range(N, W); | |
std::default_random_engine engine {std::random_device{}()}; | |
std::uniform_int_distribution<size_t> dist(b1, b2); | |
size_t chunk_size = dist(engine); | |
size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
while(curr_b < N) { | |
if(func(curr_b, std::min(curr_b + chunk_size, N))){ | |
return; | |
} | |
chunk_size = dist(engine); | |
curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed); | |
} | |
} | |
private: | |
float _alpha {0.01f}; | |
float _beta {0.5f}; | |
}; | |
/** | |
@brief default partitioner set to tf::GuidedPartitioner | |
Guided partitioner can achieve decent performance for most parallel algorithms, | |
especially for those with irregular and unbalanced workload per iteration. | |
*/ | |
using DefaultPartitioner = GuidedPartitioner; | |
/** | |
@brief determines if a type is a partitioner | |
A partitioner is a derived type from tf::PartitionerBase. | |
*/ | |
template <typename C> | |
inline constexpr bool is_partitioner_v = std::is_base_of<PartitionerBase, C>::value; | |
} // end of namespace tf ----------------------------------------------------- | |
/** | |
@file flow_builder.hpp | |
@brief flow builder include file | |
*/ | |
namespace tf { | |
/** | |
@class FlowBuilder | |
@brief class to build a task dependency graph | |
The class provides essential methods to construct a task dependency graph | |
from which tf::Taskflow and tf::Subflow are derived. | |
*/ | |
class FlowBuilder { | |
friend class Executor; | |
public: | |
/** | |
@brief constructs a flow builder with a graph | |
*/ | |
FlowBuilder(Graph& graph); | |
/** | |
@brief creates a static task | |
@tparam C callable type constructible from std::function<void()> | |
@param callable callable to construct a static task | |
@return a tf::Task handle | |
The following example creates a static task. | |
@code{.cpp} | |
tf::Task static_task = taskflow.emplace([](){}); | |
@endcode | |
Please refer to @ref StaticTasking for details. | |
*/ | |
template <typename C, | |
std::enable_if_t<is_static_task_v<C>, void>* = nullptr | |
> | |
Task emplace(C&& callable); | |
/** | |
@brief creates a dynamic task | |
@tparam C callable type constructible from std::function<void(tf::Subflow&)> | |
@param callable callable to construct a dynamic task | |
@return a tf::Task handle | |
The following example creates a dynamic task (tf::Subflow) | |
that spawns two static tasks. | |
@code{.cpp} | |
tf::Task dynamic_task = taskflow.emplace([](tf::Subflow& sf){ | |
tf::Task static_task1 = sf.emplace([](){}); | |
tf::Task static_task2 = sf.emplace([](){}); | |
}); | |
@endcode | |
Please refer to @ref DynamicTasking for details. | |
*/ | |
template <typename C, | |
std::enable_if_t<is_dynamic_task_v<C>, void>* = nullptr | |
> | |
Task emplace(C&& callable); | |
/** | |
@brief creates a condition task | |
@tparam C callable type constructible from std::function<int()> | |
@param callable callable to construct a condition task | |
@return a tf::Task handle | |
The following example creates an if-else block using one condition task | |
and three static tasks. | |
@code{.cpp} | |
tf::Taskflow taskflow; | |
auto [init, cond, yes, no] = taskflow.emplace( | |
[] () { }, | |
[] () { return 0; }, | |
[] () { std::cout << "yes\n"; }, | |
[] () { std::cout << "no\n"; } | |
); | |
// executes yes if cond returns 0, or no if cond returns 1 | |
cond.precede(yes, no); | |
cond.succeed(init); | |
@endcode | |
Please refer to @ref ConditionalTasking for details. | |
*/ | |
template <typename C, | |
std::enable_if_t<is_condition_task_v<C>, void>* = nullptr | |
> | |
Task emplace(C&& callable); | |
/** | |
@brief creates a multi-condition task | |
@tparam C callable type constructible from | |
std::function<tf::SmallVector<int>()> | |
@param callable callable to construct a multi-condition task | |
@return a tf::Task handle | |
The following example creates a multi-condition task that selectively | |
jumps to two successor tasks. | |
@code{.cpp} | |
tf::Taskflow taskflow; | |
auto [init, cond, branch1, branch2, branch3] = taskflow.emplace( | |
[] () { }, | |
[] () { return tf::SmallVector{0, 2}; }, | |
[] () { std::cout << "branch1\n"; }, | |
[] () { std::cout << "branch2\n"; }, | |
[] () { std::cout << "branch3\n"; } | |
); | |
// executes branch1 and branch3 when cond returns 0 and 2 | |
cond.precede(branch1, branch2, branch3); | |
cond.succeed(init); | |
@endcode | |
Please refer to @ref ConditionalTasking for details. | |
*/ | |
template <typename C, | |
std::enable_if_t<is_multi_condition_task_v<C>, void>* = nullptr | |
> | |
Task emplace(C&& callable); | |
/** | |
@brief creates multiple tasks from a list of callable objects | |
@tparam C callable types | |
@param callables one or multiple callable objects constructible from each task category | |
@return a tf::Task handle | |
The method returns a tuple of tasks each corresponding to the given | |
callable target. You can use structured binding to get the return tasks | |
one by one. | |
The following example creates four static tasks and assign them to | |
@c A, @c B, @c C, and @c D using structured binding. | |
@code{.cpp} | |
auto [A, B, C, D] = taskflow.emplace( | |
[] () { std::cout << "A"; }, | |
[] () { std::cout << "B"; }, | |
[] () { std::cout << "C"; }, | |
[] () { std::cout << "D"; } | |
); | |
@endcode | |
*/ | |
template <typename... C, std::enable_if_t<(sizeof...(C)>1), void>* = nullptr> | |
auto emplace(C&&... callables); | |
/** | |
@brief removes a task from a taskflow | |
@param task task to remove | |
Removes a task and its input and output dependencies from the graph | |
associated with the flow builder. | |
If the task does not belong to the graph, nothing will happen. | |
@code{.cpp} | |
tf::Task A = taskflow.emplace([](){ std::cout << "A"; }); | |
tf::Task B = taskflow.emplace([](){ std::cout << "B"; }); | |
tf::Task C = taskflow.emplace([](){ std::cout << "C"; }); | |
tf::Task D = taskflow.emplace([](){ std::cout << "D"; }); | |
A.precede(B, C, D); | |
// erase A from the taskflow and its dependencies to B, C, and D | |
taskflow.erase(A); | |
@endcode | |
*/ | |
void erase(Task task); | |
/** | |
@brief creates a module task for the target object | |
@tparam T target object type | |
@param object a custom object that defines the method @c T::graph() | |
@return a tf::Task handle | |
The example below demonstrates a taskflow composition using | |
the @c composed_of method. | |
@code{.cpp} | |
tf::Taskflow t1, t2; | |
t1.emplace([](){ std::cout << "t1"; }); | |
// t2 is partially composed of t1 | |
tf::Task comp = t2.composed_of(t1); | |
tf::Task init = t2.emplace([](){ std::cout << "t2"; }); | |
init.precede(comp); | |
@endcode | |
The taskflow object @c t2 is composed of another taskflow object @c t1, | |
preceded by another static task @c init. | |
When taskflow @c t2 is submitted to an executor, | |
@c init will run first and then @c comp which spwans its definition | |
in taskflow @c t1. | |
The target @c object being composed must define the method | |
<tt>T::graph()</tt> that returns a reference to a graph object of | |
type tf::Graph such that it can interact with the executor. | |
For example: | |
@code{.cpp} | |
// custom struct | |
struct MyObj { | |
tf::Graph graph; | |
MyObj() { | |
tf::FlowBuilder builder(graph); | |
tf::Task task = builder.emplace([](){ | |
std::cout << "a task\n"; // static task | |
}); | |
} | |
Graph& graph() { return graph; } | |
}; | |
MyObj obj; | |
tf::Task comp = taskflow.composed_of(obj); | |
@endcode | |
Please refer to @ref ComposableTasking for details. | |
*/ | |
template <typename T> | |
Task composed_of(T& object); | |
/** | |
@brief creates a placeholder task | |
@return a tf::Task handle | |
A placeholder task maps to a node in the taskflow graph, but | |
it does not have any callable work assigned yet. | |
A placeholder task is different from an empty task handle that | |
does not point to any node in a graph. | |
@code{.cpp} | |
// create a placeholder task with no callable target assigned | |
tf::Task placeholder = taskflow.placeholder(); | |
assert(placeholder.empty() == false && placeholder.has_work() == false); | |
// create an empty task handle | |
tf::Task task; | |
assert(task.empty() == true); | |
// assign the task handle to the placeholder task | |
task = placeholder; | |
assert(task.empty() == false && task.has_work() == false); | |
@endcode | |
*/ | |
Task placeholder(); | |
/** | |
@brief adds adjacent dependency links to a linear list of tasks | |
@param tasks a vector of tasks | |
This member function creates linear dependencies over a vector of tasks. | |
@code{.cpp} | |
tf::Task A = taskflow.emplace([](){ std::cout << "A"; }); | |
tf::Task B = taskflow.emplace([](){ std::cout << "B"; }); | |
tf::Task C = taskflow.emplace([](){ std::cout << "C"; }); | |
tf::Task D = taskflow.emplace([](){ std::cout << "D"; }); | |
std::vector<tf::Task> tasks {A, B, C, D} | |
taskflow.linearize(tasks); // A->B->C->D | |
@endcode | |
*/ | |
void linearize(std::vector<Task>& tasks); | |
/** | |
@brief adds adjacent dependency links to a linear list of tasks | |
@param tasks an initializer list of tasks | |
This member function creates linear dependencies over a list of tasks. | |
@code{.cpp} | |
tf::Task A = taskflow.emplace([](){ std::cout << "A"; }); | |
tf::Task B = taskflow.emplace([](){ std::cout << "B"; }); | |
tf::Task C = taskflow.emplace([](){ std::cout << "C"; }); | |
tf::Task D = taskflow.emplace([](){ std::cout << "D"; }); | |
taskflow.linearize({A, B, C, D}); // A->B->C->D | |
@endcode | |
*/ | |
void linearize(std::initializer_list<Task> tasks); | |
// ------------------------------------------------------------------------ | |
// parallel iterations | |
// ------------------------------------------------------------------------ | |
/** | |
@brief constructs an STL-styled parallel-for task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam C callable type | |
@tparam P partitioner type (default tf::GuidedPartitioner) | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@param callable callable object to apply to the dereferenced iterator | |
@param part partitioning algorithm to schedule parallel iterations | |
@return a tf::Task handle | |
The task spawns asynchronous tasks that applies the callable object to each object | |
obtained by dereferencing every iterator in the range <tt>[first, last)</tt>. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
for(auto itr=first; itr!=last; itr++) { | |
callable(*itr); | |
} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
The callable needs to take a single argument of | |
the dereferenced iterator type. | |
Please refer to @ref ParallelIterations for details. | |
*/ | |
template <typename B, typename E, typename C, typename P = GuidedPartitioner> | |
Task for_each(B first, E last, C callable, P&& part = P()); | |
/** | |
@brief constructs an STL-styled index-based parallel-for task | |
@tparam B beginning index type (must be integral) | |
@tparam E ending index type (must be integral) | |
@tparam S step type (must be integral) | |
@tparam C callable type | |
@tparam P partitioner type (default tf::GuidedPartitioner) | |
@param first index of the beginning (inclusive) | |
@param last index of the end (exclusive) | |
@param step step size | |
@param callable callable object to apply to each valid index | |
@param part partitioning algorithm to schedule parallel iterations | |
@return a tf::Task handle | |
The task spawns asynchronous tasks that applies the callable object to each index | |
in the range <tt>[first, last)</tt> with the step size. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
// case 1: step size is positive | |
for(auto i=first; i<last; i+=step) { | |
callable(i); | |
} | |
// case 2: step size is negative | |
for(auto i=first, i>last; i+=step) { | |
callable(i); | |
} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
The callable needs to take a single argument of the integral index type. | |
Please refer to @ref ParallelIterations for details. | |
*/ | |
template <typename B, typename E, typename S, typename C, typename P = GuidedPartitioner> | |
Task for_each_index( | |
B first, E last, S step, C callable, P&& part = P() | |
); | |
// ------------------------------------------------------------------------ | |
// transform | |
// ------------------------------------------------------------------------ | |
/** | |
@brief constructs a parallel-transform task | |
@tparam B beginning input iterator type | |
@tparam E ending input iterator type | |
@tparam O output iterator type | |
@tparam C callable type | |
@tparam P partitioner type (default tf::GuidedPartitioner) | |
@param first1 iterator to the beginning of the first range | |
@param last1 iterator to the end of the first range | |
@param d_first iterator to the beginning of the output range | |
@param c an unary callable to apply to dereferenced input elements | |
@param part partitioning algorithm to schedule parallel iterations | |
@return a tf::Task handle | |
The task spawns asynchronous tasks that applies the callable object to an | |
input range and stores the result in another output range. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
while (first1 != last1) { | |
*d_first++ = c(*first1++); | |
} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
The callable needs to take a single argument of the dereferenced | |
iterator type. | |
Please refer to @ref ParallelTransforms for details. | |
*/ | |
template < | |
typename B, typename E, typename O, typename C, typename P = GuidedPartitioner | |
> | |
Task transform(B first1, E last1, O d_first, C c, P&& part = P()); | |
/** | |
@brief constructs a parallel-transform task | |
@tparam B1 beginning input iterator type for the first input range | |
@tparam E1 ending input iterator type for the first input range | |
@tparam B2 beginning input iterator type for the first second range | |
@tparam O output iterator type | |
@tparam C callable type | |
@tparam P partitioner type (default tf::GuidedPartitioner) | |
@param first1 iterator to the beginning of the first input range | |
@param last1 iterator to the end of the first input range | |
@param first2 iterator to the beginning of the second input range | |
@param d_first iterator to the beginning of the output range | |
@param c a binary operator to apply to dereferenced input elements | |
@param part partitioning algorithm to schedule parallel iterations | |
@return a tf::Task handle | |
The task spawns asynchronous tasks that applies the callable object to two | |
input ranges and stores the result in another output range. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
while (first1 != last1) { | |
*d_first++ = c(*first1++, *first2++); | |
} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
The callable needs to take two arguments of dereferenced elements | |
from the two input ranges. | |
Please refer to @ref ParallelTransforms for details. | |
*/ | |
template < | |
typename B1, typename E1, typename B2, typename O, typename C, typename P=GuidedPartitioner, | |
std::enable_if_t<!is_partitioner_v<std::decay_t<C>>, void>* = nullptr | |
> | |
Task transform(B1 first1, E1 last1, B2 first2, O d_first, C c, P&& part = P()); | |
// ------------------------------------------------------------------------ | |
// reduction | |
// ------------------------------------------------------------------------ | |
/** | |
@brief constructs an STL-styled parallel-reduce task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam T result type | |
@tparam O binary reducer type | |
@tparam P partitioner type (default tf::GuidedPartitioner) | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@param init initial value of the reduction and the storage for the reduced result | |
@param bop binary operator that will be applied | |
@param part partitioning algorithm to schedule parallel iterations | |
@return a tf::Task handle | |
The task spawns asynchronous tasks to perform parallel reduction over @c init | |
and the elements in the range <tt>[first, last)</tt>. | |
The reduced result is store in @c init. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
for(auto itr=first; itr!=last; itr++) { | |
init = bop(init, *itr); | |
} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelReduction for details. | |
*/ | |
template <typename B, typename E, typename T, typename O, typename P = GuidedPartitioner> | |
Task reduce(B first, E last, T& init, O bop, P&& part = P()); | |
// ------------------------------------------------------------------------ | |
// transfrom and reduction | |
// ------------------------------------------------------------------------ | |
/** | |
@brief constructs an STL-styled parallel transform-reduce task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam T result type | |
@tparam BOP binary reducer type | |
@tparam UOP unary transformion type | |
@tparam P partitioner type (default tf::GuidedPartitioner) | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@param init initial value of the reduction and the storage for the reduced result | |
@param bop binary operator that will be applied in unspecified order to the results of @c uop | |
@param uop unary operator that will be applied to transform each element in the range to the result type | |
@param part partitioning algorithm to schedule parallel iterations | |
@return a tf::Task handle | |
The task spawns asynchronous tasks to perform parallel reduction over @c init and | |
the transformed elements in the range <tt>[first, last)</tt>. | |
The reduced result is store in @c init. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
for(auto itr=first; itr!=last; itr++) { | |
init = bop(init, uop(*itr)); | |
} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelReduction for details. | |
*/ | |
template < | |
typename B, typename E, typename T, typename BOP, typename UOP, typename P = GuidedPartitioner | |
> | |
Task transform_reduce(B first, E last, T& init, BOP bop, UOP uop, P&& part = P()); | |
// ------------------------------------------------------------------------ | |
// scan | |
// ------------------------------------------------------------------------ | |
/** | |
@brief creates an STL-styled parallel inclusive-scan task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam D destination iterator type | |
@tparam BOP summation operator type | |
@param first start of input range | |
@param last end of input range | |
@param d_first start of output range (may be the same as input range) | |
@param bop function to perform summation | |
Performs the cumulative sum (aka prefix sum, aka scan) of the input range | |
and writes the result to the output range. | |
Each element of the output range contains the | |
running total of all earlier elements using the given binary operator | |
for summation. | |
This function generates an @em inclusive scan, meaning that the N-th element | |
of the output range is the sum of the first N input elements, | |
so the N-th input element is included. | |
@code{.cpp} | |
std::vector<int> input = {1, 2, 3, 4, 5}; | |
taskflow.inclusive_scan( | |
input.begin(), input.end(), input.begin(), std::plus<int>{} | |
); | |
executor.run(taskflow).wait(); | |
// input is {1, 3, 6, 10, 15} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelScan for details. | |
*/ | |
template <typename B, typename E, typename D, typename BOP> | |
Task inclusive_scan(B first, E last, D d_first, BOP bop); | |
/** | |
@brief creates an STL-styled parallel inclusive-scan task with an initial value | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam D destination iterator type | |
@tparam BOP summation operator type | |
@tparam T initial value type | |
@param first start of input range | |
@param last end of input range | |
@param d_first start of output range (may be the same as input range) | |
@param bop function to perform summation | |
@param init initial value | |
Performs the cumulative sum (aka prefix sum, aka scan) of the input range | |
and writes the result to the output range. | |
Each element of the output range contains the | |
running total of all earlier elements (and the initial value) | |
using the given binary operator for summation. | |
This function generates an @em inclusive scan, meaning the N-th element | |
of the output range is the sum of the first N input elements, | |
so the N-th input element is included. | |
@code{.cpp} | |
std::vector<int> input = {1, 2, 3, 4, 5}; | |
taskflow.inclusive_scan( | |
input.begin(), input.end(), input.begin(), std::plus<int>{}, -1 | |
); | |
executor.run(taskflow).wait(); | |
// input is {0, 2, 5, 9, 14} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelScan for details. | |
*/ | |
template <typename B, typename E, typename D, typename BOP, typename T> | |
Task inclusive_scan(B first, E last, D d_first, BOP bop, T init); | |
/** | |
@brief creates an STL-styled parallel exclusive-scan task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam D destination iterator type | |
@tparam T initial value type | |
@tparam BOP summation operator type | |
@param first start of input range | |
@param last end of input range | |
@param d_first start of output range (may be the same as input range) | |
@param init initial value | |
@param bop function to perform summation | |
Performs the cumulative sum (aka prefix sum, aka scan) of the input range | |
and writes the result to the output range. | |
Each element of the output range contains the | |
running total of all earlier elements (and the initial value) | |
using the given binary operator for summation. | |
This function generates an @em exclusive scan, meaning the N-th element | |
of the output range is the sum of the first N-1 input elements, | |
so the N-th input element is not included. | |
@code{.cpp} | |
std::vector<int> input = {1, 2, 3, 4, 5}; | |
taskflow.exclusive_scan( | |
input.begin(), input.end(), input.begin(), -1, std::plus<int>{} | |
); | |
executor.run(taskflow).wait(); | |
// input is {-1, 0, 2, 5, 9} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelScan for details. | |
*/ | |
template <typename B, typename E, typename D, typename T, typename BOP> | |
Task exclusive_scan(B first, E last, D d_first, T init, BOP bop); | |
// ------------------------------------------------------------------------ | |
// transform scan | |
// ------------------------------------------------------------------------ | |
/** | |
@brief creates an STL-styled parallel transform-inclusive scan task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam D destination iterator type | |
@tparam BOP summation operator type | |
@tparam UOP transform operator type | |
@param first start of input range | |
@param last end of input range | |
@param d_first start of output range (may be the same as input range) | |
@param bop function to perform summation | |
@param uop function to transform elements of the input range | |
Write the cumulative sum (aka prefix sum, aka scan) of the input range | |
to the output range. Each element of the output range contains the | |
running total of all earlier elements | |
using @c uop to transform the input elements | |
and using @c bop for summation. | |
This function generates an @em inclusive scan, meaning the Nth element | |
of the output range is the sum of the first N input elements, | |
so the Nth input element is included. | |
@code{.cpp} | |
std::vector<int> input = {1, 2, 3, 4, 5}; | |
taskflow.transform_inclusive_scan( | |
input.begin(), input.end(), input.begin(), std::plus<int>{}, | |
[] (int item) { return -item; } | |
); | |
executor.run(taskflow).wait(); | |
// input is {-1, -3, -6, -10, -15} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelScan for details. | |
*/ | |
template <typename B, typename E, typename D, typename BOP, typename UOP> | |
Task transform_inclusive_scan(B first, E last, D d_first, BOP bop, UOP uop); | |
/** | |
@brief creates an STL-styled parallel transform-inclusive scan task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam D destination iterator type | |
@tparam BOP summation operator type | |
@tparam UOP transform operator type | |
@tparam T initial value type | |
@param first start of input range | |
@param last end of input range | |
@param d_first start of output range (may be the same as input range) | |
@param bop function to perform summation | |
@param uop function to transform elements of the input range | |
@param init initial value | |
Write the cumulative sum (aka prefix sum, aka scan) of the input range | |
to the output range. Each element of the output range contains the | |
running total of all earlier elements (including an initial value) | |
using @c uop to transform the input elements | |
and using @c bop for summation. | |
This function generates an @em inclusive scan, meaning the Nth element | |
of the output range is the sum of the first N input elements, | |
so the Nth input element is included. | |
@code{.cpp} | |
std::vector<int> input = {1, 2, 3, 4, 5}; | |
taskflow.transform_inclusive_scan( | |
input.begin(), input.end(), input.begin(), std::plus<int>{}, | |
[] (int item) { return -item; }, | |
-1 | |
); | |
executor.run(taskflow).wait(); | |
// input is {-2, -4, -7, -11, -16} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelScan for details. | |
*/ | |
template <typename B, typename E, typename D, typename BOP, typename UOP, typename T> | |
Task transform_inclusive_scan(B first, E last, D d_first, BOP bop, UOP uop, T init); | |
/** | |
@brief creates an STL-styled parallel transform-exclusive scan task | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam D destination iterator type | |
@tparam BOP summation operator type | |
@tparam UOP transform operator type | |
@tparam T initial value type | |
@param first start of input range | |
@param last end of input range | |
@param d_first start of output range (may be the same as input range) | |
@param bop function to perform summation | |
@param uop function to transform elements of the input range | |
@param init initial value | |
Write the cumulative sum (aka prefix sum, aka scan) of the input range | |
to the output range. Each element of the output range contains the | |
running total of all earlier elements (including an initial value) | |
using @c uop to transform the input elements | |
and using @c bop for summation. | |
This function generates an @em exclusive scan, meaning the Nth element | |
of the output range is the sum of the first N-1 input elements, | |
so the Nth input element is not included. | |
@code{.cpp} | |
std::vector<int> input = {1, 2, 3, 4, 5}; | |
taskflow.transform_exclusive_scan( | |
input.begin(), input.end(), input.begin(), -1, std::plus<int>{}, | |
[](int item) { return -item; } | |
); | |
executor.run(taskflow).wait(); | |
// input is {-1, -2, -4, -7, -11} | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelScan for details. | |
*/ | |
template <typename B, typename E, typename D, typename T, typename BOP, typename UOP> | |
Task transform_exclusive_scan(B first, E last, D d_first, T init, BOP bop, UOP uop); | |
// ------------------------------------------------------------------------ | |
// find | |
// ------------------------------------------------------------------------ | |
/** | |
@brief constructs a task to perform STL-styled find-if algorithm | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam T resulting iterator type | |
@tparam UOP unary predicate type | |
@tparam P partitioner type | |
@param first start of the input range | |
@param last end of the input range | |
@param result resulting iterator to the found element in the input range | |
@param predicate unary predicate which returns @c true for the required element | |
@param part partitioning algorithm (default tf::GuidedPartitioner) | |
Returns an iterator to the first element in the range <tt>[first, last)</tt> | |
that satisfies the given criteria (or last if there is no such iterator). | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
auto find_if(InputIt first, InputIt last, UnaryPredicate p) { | |
for (; first != last; ++first) { | |
if (predicate(*first)){ | |
return first; | |
} | |
} | |
return last; | |
} | |
@endcode | |
For example, the code below find the element that satisfies the given | |
criteria (value plus one is equal to 23) from an input range of 10 elements: | |
@code{.cpp} | |
std::vector<int> input = {1, 6, 9, 10, 22, 5, 7, 8, 9, 11}; | |
std::vector<int>::iterator result; | |
taskflow.find_if( | |
input.begin(), input.end(), [](int i){ return i+1 = 23; }, result | |
); | |
executor.run(taskflow).wait(); | |
assert(*result == 22); | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
*/ | |
template <typename B, typename E, typename T, typename UOP, typename P = GuidedPartitioner> | |
Task find_if(B first, E last, T& result, UOP predicate, P&& part = P()); | |
/** | |
@brief constructs a task to perform STL-styled find-if-not algorithm | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam T resulting iterator type | |
@tparam UOP unary predicate type | |
@tparam P partitioner type | |
@param first start of the input range | |
@param last end of the input range | |
@param result resulting iterator to the found element in the input range | |
@param predicate unary predicate which returns @c false for the required element | |
@param part partitioning algorithm (default tf::GuidedPartitioner) | |
Returns an iterator to the first element in the range <tt>[first, last)</tt> | |
that satisfies the given criteria (or last if there is no such iterator). | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
auto find_if(InputIt first, InputIt last, UnaryPredicate p) { | |
for (; first != last; ++first) { | |
if (!predicate(*first)){ | |
return first; | |
} | |
} | |
return last; | |
} | |
@endcode | |
For example, the code below find the element that satisfies the given | |
criteria (value is not equal to 1) from an input range of 10 elements: | |
@code{.cpp} | |
std::vector<int> input = {1, 1, 1, 1, 22, 1, 1, 1, 1, 1}; | |
std::vector<int>::iterator result; | |
taskflow.find_if_not( | |
input.begin(), input.end(), [](int i){ return i == 1; }, result | |
); | |
executor.run(taskflow).wait(); | |
assert(*result == 22); | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
*/ | |
template <typename B, typename E, typename T, typename UOP,typename P = GuidedPartitioner> | |
Task find_if_not(B first, E last, T& result, UOP predicate, P&& part = P()); | |
/** | |
@brief constructs a task to perform STL-styled min-element algorithm | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam T resulting iterator type | |
@tparam C comparator type | |
@tparam P partitioner type | |
@param first start of the input range | |
@param last end of the input range | |
@param result resulting iterator to the found element in the input range | |
@param comp comparison function object | |
@param part partitioning algorithm (default tf::GuidedPartitioner) | |
Finds the smallest element in the <tt>[first, last)</tt> | |
using the given comparison function object. | |
The iterator to that smallest element is stored in @c result. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
if (first == last) { | |
return last; | |
} | |
auto smallest = first; | |
++first; | |
for (; first != last; ++first) { | |
if (comp(*first, *smallest)) { | |
smallest = first; | |
} | |
} | |
return smallest; | |
@endcode | |
For example, the code below find the smallest element from an input | |
range of 10 elements. | |
@code{.cpp} | |
std::vector<int> input = {1, 1, 1, 1, 1, -1, 1, 1, 1, 1}; | |
std::vector<int>::iterator result; | |
taskflow.min_element( | |
input.begin(), input.end(), std::less<int>(), result | |
); | |
executor.run(taskflow).wait(); | |
assert(*result == -1); | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
*/ | |
template <typename B, typename E, typename T, typename C, typename P> | |
Task min_element(B first, E last, T& result, C comp, P&& part); | |
/** | |
@brief constructs a task to perform STL-styled max-element algorithm | |
@tparam B beginning iterator type | |
@tparam E ending iterator type | |
@tparam T resulting iterator type | |
@tparam C comparator type | |
@tparam P partitioner type | |
@param first start of the input range | |
@param last end of the input range | |
@param result resulting iterator to the found element in the input range | |
@param comp comparison function object | |
@param part partitioning algorithm (default tf::GuidedPartitioner) | |
Finds the largest element in the <tt>[first, last)</tt> | |
using the given comparison function object. | |
The iterator to that largest element is stored in @c result. | |
This method is equivalent to the parallel execution of the following loop: | |
@code{.cpp} | |
if (first == last){ | |
return last; | |
} | |
auto largest = first; | |
++first; | |
for (; first != last; ++first) { | |
if (comp(*largest, *first)) { | |
largest = first; | |
} | |
} | |
return largest; | |
@endcode | |
For example, the code below find the largest element from an input | |
range of 10 elements. | |
@code{.cpp} | |
std::vector<int> input = {1, 1, 1, 1, 1, 2, 1, 1, 1, 1}; | |
std::vector<int>::iterator result; | |
taskflow.max_element( | |
input.begin(), input.end(), std::less<int>(), result | |
); | |
executor.run(taskflow).wait(); | |
assert(*result == 2); | |
@endcode | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
*/ | |
template <typename B, typename E, typename T, typename C, typename P> | |
Task max_element(B first, E last, T& result, C comp, P&& part); | |
// ------------------------------------------------------------------------ | |
// sort | |
// ------------------------------------------------------------------------ | |
/** | |
@brief constructs a dynamic task to perform STL-styled parallel sort | |
@tparam B beginning iterator type (random-accessible) | |
@tparam E ending iterator type (random-accessible) | |
@tparam C comparator type | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@param cmp comparison operator | |
The task spawns asynchronous tasks to sort elements in the range | |
<tt>[first, last)</tt> in parallel. | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelSort for details. | |
*/ | |
template <typename B, typename E, typename C> | |
Task sort(B first, E last, C cmp); | |
/** | |
@brief constructs a dynamic task to perform STL-styled parallel sort using | |
the @c std::less<T> comparator, where @c T is the element type | |
@tparam B beginning iterator type (random-accessible) | |
@tparam E ending iterator type (random-accessible) | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
The task spawns asynchronous tasks to parallelly sort elements in the range | |
<tt>[first, last)</tt> using the @c std::less<T> comparator, | |
where @c T is the dereferenced iterator type. | |
Iterators are templated to enable stateful range using std::reference_wrapper. | |
Please refer to @ref ParallelSort for details. | |
*/ | |
template <typename B, typename E> | |
Task sort(B first, E last); | |
protected: | |
/** | |
@brief associated graph object | |
*/ | |
Graph& _graph; | |
private: | |
template <typename L> | |
void _linearize(L&); | |
}; | |
// Constructor | |
inline FlowBuilder::FlowBuilder(Graph& graph) : | |
_graph {graph} { | |
} | |
// Function: emplace | |
template <typename C, std::enable_if_t<is_static_task_v<C>, void>*> | |
Task FlowBuilder::emplace(C&& c) { | |
return Task(_graph._emplace_back("", 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::Static>{}, std::forward<C>(c) | |
)); | |
} | |
// Function: emplace | |
template <typename C, std::enable_if_t<is_dynamic_task_v<C>, void>*> | |
Task FlowBuilder::emplace(C&& c) { | |
return Task(_graph._emplace_back("", 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::Dynamic>{}, std::forward<C>(c) | |
)); | |
} | |
// Function: emplace | |
template <typename C, std::enable_if_t<is_condition_task_v<C>, void>*> | |
Task FlowBuilder::emplace(C&& c) { | |
return Task(_graph._emplace_back("", 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::Condition>{}, std::forward<C>(c) | |
)); | |
} | |
// Function: emplace | |
template <typename C, std::enable_if_t<is_multi_condition_task_v<C>, void>*> | |
Task FlowBuilder::emplace(C&& c) { | |
return Task(_graph._emplace_back("", 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::MultiCondition>{}, std::forward<C>(c) | |
)); | |
} | |
// Function: emplace | |
template <typename... C, std::enable_if_t<(sizeof...(C)>1), void>*> | |
auto FlowBuilder::emplace(C&&... cs) { | |
return std::make_tuple(emplace(std::forward<C>(cs))...); | |
} | |
// Function: erase | |
inline void FlowBuilder::erase(Task task) { | |
if (!task._node) { | |
return; | |
} | |
task.for_each_dependent([&] (Task dependent) { | |
auto& S = dependent._node->_successors; | |
if(auto I = std::find(S.begin(), S.end(), task._node); I != S.end()) { | |
S.erase(I); | |
} | |
}); | |
task.for_each_successor([&] (Task dependent) { | |
auto& D = dependent._node->_dependents; | |
if(auto I = std::find(D.begin(), D.end(), task._node); I != D.end()) { | |
D.erase(I); | |
} | |
}); | |
_graph._erase(task._node); | |
} | |
// Function: composed_of | |
template <typename T> | |
Task FlowBuilder::composed_of(T& object) { | |
auto node = _graph._emplace_back("", 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::Module>{}, object | |
); | |
return Task(node); | |
} | |
// Function: placeholder | |
inline Task FlowBuilder::placeholder() { | |
auto node = _graph._emplace_back("", 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::Placeholder>{} | |
); | |
return Task(node); | |
} | |
// Procedure: _linearize | |
template <typename L> | |
void FlowBuilder::_linearize(L& keys) { | |
auto itr = keys.begin(); | |
auto end = keys.end(); | |
if(itr == end) { | |
return; | |
} | |
auto nxt = itr; | |
for(++nxt; nxt != end; ++nxt, ++itr) { | |
itr->_node->_precede(nxt->_node); | |
} | |
} | |
// Procedure: linearize | |
inline void FlowBuilder::linearize(std::vector<Task>& keys) { | |
_linearize(keys); | |
} | |
// Procedure: linearize | |
inline void FlowBuilder::linearize(std::initializer_list<Task> keys) { | |
_linearize(keys); | |
} | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Subflow | |
@brief class to construct a subflow graph from the execution of a dynamic task | |
tf::Subflow is a derived class from tf::Runtime with a specialized mechanism | |
to manage the execution of a child graph. | |
By default, a subflow automatically @em joins its parent node. | |
You may explicitly join or detach a subflow by calling tf::Subflow::join | |
or tf::Subflow::detach, respectively. | |
The following example creates a taskflow graph that spawns a subflow from | |
the execution of task @c B, and the subflow contains three tasks, @c B1, | |
@c B2, and @c B3, where @c B3 runs after @c B1 and @c B2. | |
@code{.cpp} | |
// create three static tasks | |
tf::Task A = taskflow.emplace([](){}).name("A"); | |
tf::Task C = taskflow.emplace([](){}).name("C"); | |
tf::Task D = taskflow.emplace([](){}).name("D"); | |
// create a subflow graph (dynamic tasking) | |
tf::Task B = taskflow.emplace([] (tf::Subflow& subflow) { | |
tf::Task B1 = subflow.emplace([](){}).name("B1"); | |
tf::Task B2 = subflow.emplace([](){}).name("B2"); | |
tf::Task B3 = subflow.emplace([](){}).name("B3"); | |
B1.precede(B3); | |
B2.precede(B3); | |
}).name("B"); | |
A.precede(B); // B runs after A | |
A.precede(C); // C runs after A | |
B.precede(D); // D runs after B | |
C.precede(D); // D runs after C | |
@endcode | |
*/ | |
class Subflow : public FlowBuilder, | |
public Runtime { | |
friend class Executor; | |
friend class FlowBuilder; | |
friend class Runtime; | |
public: | |
/** | |
@brief enables the subflow to join its parent task | |
Performs an immediate action to join the subflow. Once the subflow is joined, | |
it is considered finished and you may not modify the subflow anymore. | |
@code{.cpp} | |
taskflow.emplace([](tf::Subflow& sf){ | |
sf.emplace([](){}); | |
sf.join(); // join the subflow of one task | |
}); | |
@endcode | |
Only the worker that spawns this subflow can join it. | |
*/ | |
void join(); | |
/** | |
@brief enables the subflow to detach from its parent task | |
Performs an immediate action to detach the subflow. Once the subflow is detached, | |
it is considered finished and you may not modify the subflow anymore. | |
@code{.cpp} | |
taskflow.emplace([](tf::Subflow& sf){ | |
sf.emplace([](){}); | |
sf.detach(); | |
}); | |
@endcode | |
Only the worker that spawns this subflow can detach it. | |
*/ | |
void detach(); | |
/** | |
@brief resets the subflow to a joinable state | |
@param clear_graph specifies whether to clear the associated graph (default @c true) | |
Clears the underlying task graph depending on the | |
given variable @c clear_graph (default @c true) and then | |
updates the subflow to a joinable state. | |
*/ | |
void reset(bool clear_graph = true); | |
/** | |
@brief queries if the subflow is joinable | |
This member function queries if the subflow is joinable. | |
When a subflow is joined or detached, it becomes not joinable. | |
@code{.cpp} | |
taskflow.emplace([](tf::Subflow& sf){ | |
sf.emplace([](){}); | |
std::cout << sf.joinable() << '\n'; // true | |
sf.join(); | |
std::cout << sf.joinable() << '\n'; // false | |
}); | |
@endcode | |
*/ | |
bool joinable() const noexcept; | |
private: | |
bool _joinable {true}; | |
Subflow(Executor&, Worker&, Node*, Graph&); | |
}; | |
// Constructor | |
inline Subflow::Subflow( | |
Executor& executor, Worker& worker, Node* parent, Graph& graph | |
) : | |
FlowBuilder {graph}, | |
Runtime {executor, worker, parent} { | |
// assert(_parent != nullptr); | |
} | |
// Function: joined | |
inline bool Subflow::joinable() const noexcept { | |
return _joinable; | |
} | |
// Procedure: reset | |
inline void Subflow::reset(bool clear_graph) { | |
if(clear_graph) { | |
_graph._clear(); | |
} | |
_joinable = true; | |
} | |
} // end of namespace tf. --------------------------------------------------- | |
/** | |
@file taskflow/core/taskflow.hpp | |
@brief taskflow include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Taskflow | |
@brief class to create a taskflow object | |
A %taskflow manages a task dependency graph where each task represents a | |
callable object (e.g., @std_lambda, @std_function) and an edge represents a | |
dependency between two tasks. A task is one of the following types: | |
1. static task : the callable constructible from | |
@c std::function<void()> | |
2. dynamic task : the callable constructible from | |
@c std::function<void(tf::Subflow&)> | |
3. condition task : the callable constructible from | |
@c std::function<int()> | |
4. multi-condition task: the callable constructible from | |
@c %std::function<tf::SmallVector<int>()> | |
5. module task : the task constructed from tf::Taskflow::composed_of | |
@c std::function<void(tf::Runtime&)> | |
Each task is a basic computation unit and is run by one worker thread | |
from an executor. | |
The following example creates a simple taskflow graph of four static tasks, | |
@c A, @c B, @c C, and @c D, where | |
@c A runs before @c B and @c C and | |
@c D runs after @c B and @c C. | |
@code{.cpp} | |
tf::Executor executor; | |
tf::Taskflow taskflow("simple"); | |
tf::Task A = taskflow.emplace([](){ std::cout << "TaskA\n"; }); | |
tf::Task B = taskflow.emplace([](){ std::cout << "TaskB\n"; }); | |
tf::Task C = taskflow.emplace([](){ std::cout << "TaskC\n"; }); | |
tf::Task D = taskflow.emplace([](){ std::cout << "TaskD\n"; }); | |
A.precede(B, C); // A runs before B and C | |
D.succeed(B, C); // D runs after B and C | |
executor.run(taskflow).wait(); | |
@endcode | |
The taskflow object itself is NOT thread-safe. You should not | |
modifying the graph while it is running, | |
such as adding new tasks, adding new dependencies, and moving | |
the taskflow to another. | |
To minimize the overhead of task creation, | |
our runtime leverages a global object pool to recycle | |
tasks in a thread-safe manner. | |
Please refer to @ref Cookbook to learn more about each task type | |
and how to submit a taskflow to an executor. | |
*/ | |
class Taskflow : public FlowBuilder { | |
friend class Topology; | |
friend class Executor; | |
friend class FlowBuilder; | |
struct Dumper { | |
size_t id; | |
std::stack<std::pair<const Node*, const Graph*>> stack; | |
std::unordered_map<const Graph*, size_t> visited; | |
}; | |
public: | |
/** | |
@brief constructs a taskflow with the given name | |
@code{.cpp} | |
tf::Taskflow taskflow("My Taskflow"); | |
std::cout << taskflow.name(); // "My Taskflow" | |
@endcode | |
*/ | |
Taskflow(const std::string& name); | |
/** | |
@brief constructs a taskflow | |
*/ | |
Taskflow(); | |
/** | |
@brief constructs a taskflow from a moved taskflow | |
Constructing a taskflow @c taskflow1 from a moved taskflow @c taskflow2 will | |
migrate the graph of @c taskflow2 to @c taskflow1. | |
After the move, @c taskflow2 will become empty. | |
@code{.cpp} | |
tf::Taskflow taskflow1(std::move(taskflow2)); | |
assert(taskflow2.empty()); | |
@endcode | |
Notice that @c taskflow2 should not be running in an executor | |
during the move operation, or the behavior is undefined. | |
*/ | |
Taskflow(Taskflow&& rhs); | |
/** | |
@brief move assignment operator | |
Moving a taskflow @c taskflow2 to another taskflow @c taskflow1 will destroy | |
the existing graph of @c taskflow1 and assign it the graph of @c taskflow2. | |
After the move, @c taskflow2 will become empty. | |
@code{.cpp} | |
taskflow1 = std::move(taskflow2); | |
assert(taskflow2.empty()); | |
@endcode | |
Notice that both @c taskflow1 and @c taskflow2 should not be running | |
in an executor during the move operation, or the behavior is undefined. | |
*/ | |
Taskflow& operator = (Taskflow&& rhs); | |
/** | |
@brief default destructor | |
When the destructor is called, all tasks and their associated data | |
(e.g., captured data) will be destroyed. | |
It is your responsibility to ensure all submitted execution of this | |
taskflow have completed before destroying it. | |
For instance, the following code results in undefined behavior | |
since the executor may still be running the taskflow while | |
it is destroyed after the block. | |
@code{.cpp} | |
{ | |
tf::Taskflow taskflow; | |
executor.run(taskflow); | |
} | |
@endcode | |
To fix the problem, we must wait for the execution to complete | |
before destroying the taskflow. | |
@code{.cpp} | |
{ | |
tf::Taskflow taskflow; | |
executor.run(taskflow).wait(); | |
} | |
@endcode | |
*/ | |
~Taskflow() = default; | |
/** | |
@brief dumps the taskflow to a DOT format through a std::ostream target | |
@code{.cpp} | |
taskflow.dump(std::cout); // dump the graph to the standard output | |
std::ofstream ofs("output.dot"); | |
taskflow.dump(ofs); // dump the graph to the file output.dot | |
@endcode | |
For dynamically spawned tasks, such as module tasks, subflow tasks, | |
and GPU tasks, you need to run the taskflow first before you can | |
dump the entire graph. | |
@code{.cpp} | |
tf::Task parent = taskflow.emplace([](tf::Subflow sf){ | |
sf.emplace([](){ std::cout << "child\n"; }); | |
}); | |
taskflow.dump(std::cout); // this dumps only the parent tasks | |
executor.run(taskflow).wait(); | |
taskflow.dump(std::cout); // this dumps both parent and child tasks | |
@endcode | |
*/ | |
void dump(std::ostream& ostream) const; | |
/** | |
@brief dumps the taskflow to a std::string of DOT format | |
This method is similar to tf::Taskflow::dump(std::ostream& ostream), | |
but returning a string of the graph in DOT format. | |
*/ | |
std::string dump() const; | |
/** | |
@brief queries the number of tasks | |
*/ | |
size_t num_tasks() const; | |
/** | |
@brief queries the emptiness of the taskflow | |
An empty taskflow has no tasks. That is the return of | |
tf::Taskflow::num_tasks is zero. | |
*/ | |
bool empty() const; | |
/** | |
@brief assigns a name to the taskflow | |
@code{.cpp} | |
taskflow.name("assign another name"); | |
@endcode | |
*/ | |
void name(const std::string&); | |
/** | |
@brief queries the name of the taskflow | |
@code{.cpp} | |
std::cout << "my name is: " << taskflow.name(); | |
@endcode | |
*/ | |
const std::string& name() const; | |
/** | |
@brief clears the associated task dependency graph | |
When you clear a taskflow, all tasks and their associated data | |
(e.g., captured data in task callables) will be destroyed. | |
The behavior of clearing a running taskflow is undefined. | |
*/ | |
void clear(); | |
/** | |
@brief applies a visitor to each task in the taskflow | |
A visitor is a callable that takes an argument of type tf::Task | |
and returns nothing. The following example iterates each task in a | |
taskflow and prints its name: | |
@code{.cpp} | |
taskflow.for_each_task([](tf::Task task){ | |
std::cout << task.name() << '\n'; | |
}); | |
@endcode | |
*/ | |
template <typename V> | |
void for_each_task(V&& visitor) const; | |
/** | |
@brief returns a reference to the underlying graph object | |
A graph object (of type tf::Graph) is the ultimate storage for the | |
task dependency graph and should only be used as an opaque | |
data structure to interact with the executor (e.g., composition). | |
*/ | |
Graph& graph(); | |
private: | |
mutable std::mutex _mutex; | |
std::string _name; | |
Graph _graph; | |
std::queue<std::shared_ptr<Topology>> _topologies; | |
std::optional<std::list<Taskflow>::iterator> _satellite; | |
void _dump(std::ostream&, const Graph*) const; | |
void _dump(std::ostream&, const Node*, Dumper&) const; | |
void _dump(std::ostream&, const Graph*, Dumper&) const; | |
}; | |
// Constructor | |
inline Taskflow::Taskflow(const std::string& name) : | |
FlowBuilder {_graph}, | |
_name {name} { | |
} | |
// Constructor | |
inline Taskflow::Taskflow() : FlowBuilder{_graph} { | |
} | |
// Move constructor | |
inline Taskflow::Taskflow(Taskflow&& rhs) : FlowBuilder{_graph} { | |
std::scoped_lock<std::mutex> lock(rhs._mutex); | |
_name = std::move(rhs._name); | |
_graph = std::move(rhs._graph); | |
_topologies = std::move(rhs._topologies); | |
_satellite = rhs._satellite; | |
rhs._satellite.reset(); | |
} | |
// Move assignment | |
inline Taskflow& Taskflow::operator = (Taskflow&& rhs) { | |
if(this != &rhs) { | |
std::scoped_lock<std::mutex, std::mutex> lock(_mutex, rhs._mutex); | |
_name = std::move(rhs._name); | |
_graph = std::move(rhs._graph); | |
_topologies = std::move(rhs._topologies); | |
_satellite = rhs._satellite; | |
rhs._satellite.reset(); | |
} | |
return *this; | |
} | |
// Procedure: | |
inline void Taskflow::clear() { | |
_graph._clear(); | |
} | |
// Function: num_tasks | |
inline size_t Taskflow::num_tasks() const { | |
return _graph.size(); | |
} | |
// Function: empty | |
inline bool Taskflow::empty() const { | |
return _graph.empty(); | |
} | |
// Function: name | |
inline void Taskflow::name(const std::string &name) { | |
_name = name; | |
} | |
// Function: name | |
inline const std::string& Taskflow::name() const { | |
return _name; | |
} | |
// Function: graph | |
inline Graph& Taskflow::graph() { | |
return _graph; | |
} | |
// Function: for_each_task | |
template <typename V> | |
void Taskflow::for_each_task(V&& visitor) const { | |
for(size_t i=0; i<_graph._nodes.size(); ++i) { | |
visitor(Task(_graph._nodes[i])); | |
} | |
} | |
// Procedure: dump | |
inline std::string Taskflow::dump() const { | |
std::ostringstream oss; | |
dump(oss); | |
return oss.str(); | |
} | |
// Function: dump | |
inline void Taskflow::dump(std::ostream& os) const { | |
os << "digraph Taskflow {\n"; | |
_dump(os, &_graph); | |
os << "}\n"; | |
} | |
// Procedure: _dump | |
inline void Taskflow::_dump(std::ostream& os, const Graph* top) const { | |
Dumper dumper; | |
dumper.id = 0; | |
dumper.stack.push({nullptr, top}); | |
dumper.visited[top] = dumper.id++; | |
while(!dumper.stack.empty()) { | |
auto [p, f] = dumper.stack.top(); | |
dumper.stack.pop(); | |
os << "subgraph cluster_p" << f << " {\nlabel=\""; | |
// n-level module | |
if(p) { | |
os << 'm' << dumper.visited[f]; | |
} | |
// top-level taskflow graph | |
else { | |
os << "Taskflow: "; | |
if(_name.empty()) os << 'p' << this; | |
else os << _name; | |
} | |
os << "\";\n"; | |
_dump(os, f, dumper); | |
os << "}\n"; | |
} | |
} | |
// Procedure: _dump | |
inline void Taskflow::_dump( | |
std::ostream& os, const Node* node, Dumper& dumper | |
) const { | |
os << 'p' << node << "[label=\""; | |
if(node->_name.empty()) os << 'p' << node; | |
else os << node->_name; | |
os << "\" "; | |
// shape for node | |
switch(node->_handle.index()) { | |
case Node::CONDITION: | |
case Node::MULTI_CONDITION: | |
os << "shape=diamond color=black fillcolor=aquamarine style=filled"; | |
break; | |
default: | |
break; | |
} | |
os << "];\n"; | |
for(size_t s=0; s<node->_successors.size(); ++s) { | |
if(node->_is_conditioner()) { | |
// case edge is dashed | |
os << 'p' << node << " -> p" << node->_successors[s] | |
<< " [style=dashed label=\"" << s << "\"];\n"; | |
} else { | |
os << 'p' << node << " -> p" << node->_successors[s] << ";\n"; | |
} | |
} | |
// subflow join node | |
if(node->_parent && node->_parent->_handle.index() == Node::DYNAMIC && | |
node->_successors.size() == 0 | |
) { | |
os << 'p' << node << " -> p" << node->_parent << ";\n"; | |
} | |
// node info | |
switch(node->_handle.index()) { | |
case Node::DYNAMIC: { | |
auto& sbg = std::get_if<Node::Dynamic>(&node->_handle)->subgraph; | |
if(!sbg.empty()) { | |
os << "subgraph cluster_p" << node << " {\nlabel=\"Subflow: "; | |
if(node->_name.empty()) os << 'p' << node; | |
else os << node->_name; | |
os << "\";\n" << "color=blue\n"; | |
_dump(os, &sbg, dumper); | |
os << "}\n"; | |
} | |
} | |
break; | |
default: | |
break; | |
} | |
} | |
// Procedure: _dump | |
inline void Taskflow::_dump( | |
std::ostream& os, const Graph* graph, Dumper& dumper | |
) const { | |
for(const auto& n : graph->_nodes) { | |
// regular task | |
if(n->_handle.index() != Node::MODULE) { | |
_dump(os, n, dumper); | |
} | |
// module task | |
else { | |
//auto module = &(std::get_if<Node::Module>(&n->_handle)->module); | |
auto module = &(std::get_if<Node::Module>(&n->_handle)->graph); | |
os << 'p' << n << "[shape=box3d, color=blue, label=\""; | |
if(n->_name.empty()) os << 'p' << n; | |
else os << n->_name; | |
if(dumper.visited.find(module) == dumper.visited.end()) { | |
dumper.visited[module] = dumper.id++; | |
dumper.stack.push({n, module}); | |
} | |
os << " [m" << dumper.visited[module] << "]\"];\n"; | |
for(const auto s : n->_successors) { | |
os << 'p' << n << "->" << 'p' << s << ";\n"; | |
} | |
} | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// class definition: Future | |
// ---------------------------------------------------------------------------- | |
/** | |
@class Future | |
@brief class to access the result of an execution | |
tf::Future is a derived class from std::future that will eventually hold the | |
execution result of a submitted taskflow (tf::Executor::run) | |
or an asynchronous task (tf::Executor::async, tf::Executor::silent_async). | |
In addition to the base methods inherited from std::future, | |
you can call tf::Future::cancel to cancel the execution of the running taskflow | |
associated with this future object. | |
The following example cancels a submission of a taskflow that contains | |
1000 tasks each running one second. | |
@code{.cpp} | |
tf::Executor executor; | |
tf::Taskflow taskflow; | |
for(int i=0; i<1000; i++) { | |
taskflow.emplace([](){ | |
std::this_thread::sleep_for(std::chrono::seconds(1)); | |
}); | |
} | |
// submit the taskflow | |
tf::Future fu = executor.run(taskflow); | |
// request to cancel the submitted execution above | |
fu.cancel(); | |
// wait until the cancellation finishes | |
fu.get(); | |
@endcode | |
*/ | |
template <typename T> | |
class Future : public std::future<T> { | |
friend class Executor; | |
friend class Subflow; | |
friend class Runtime; | |
using handle_t = std::variant< | |
std::monostate, std::weak_ptr<Topology> | |
>; | |
public: | |
/** | |
@brief default constructor | |
*/ | |
Future() = default; | |
/** | |
@brief disabled copy constructor | |
*/ | |
Future(const Future&) = delete; | |
/** | |
@brief default move constructor | |
*/ | |
Future(Future&&) = default; | |
/** | |
@brief disabled copy assignment | |
*/ | |
Future& operator = (const Future&) = delete; | |
/** | |
@brief default move assignment | |
*/ | |
Future& operator = (Future&&) = default; | |
/** | |
@brief cancels the execution of the running taskflow associated with | |
this future object | |
@return @c true if the execution can be cancelled or | |
@c false if the execution has already completed | |
When you request a cancellation, the executor will stop scheduling | |
any tasks onwards. Tasks that are already running will continue to finish | |
(non-preemptive). | |
You can call tf::Future::wait to wait for the cancellation to complete. | |
*/ | |
bool cancel(); | |
private: | |
handle_t _handle; | |
template <typename P> | |
Future(std::future<T>&&, P&&); | |
}; | |
template <typename T> | |
template <typename P> | |
Future<T>::Future(std::future<T>&& fu, P&& p) : | |
std::future<T> {std::move(fu)}, | |
_handle {std::forward<P>(p)} { | |
} | |
// Function: cancel | |
template <typename T> | |
bool Future<T>::cancel() { | |
return std::visit([](auto&& arg){ | |
using P = std::decay_t<decltype(arg)>; | |
if constexpr(std::is_same_v<P, std::monostate>) { | |
return false; | |
} | |
else { | |
auto ptr = arg.lock(); | |
if(ptr) { | |
ptr->_is_cancelled.store(true, std::memory_order_relaxed); | |
return true; | |
} | |
return false; | |
} | |
}, _handle); | |
} | |
} // end of namespace tf. --------------------------------------------------- | |
/** | |
@file async_task.hpp | |
@brief asynchronous task include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// AsyncTask | |
// ---------------------------------------------------------------------------- | |
/** | |
@brief class to create a dependent asynchronous task | |
A tf::AsyncTask is a lightweight handle that retains @em shared ownership | |
of a dependent async task created by an executor. | |
This shared ownership ensures that the async task remains alive when | |
adding it to the dependency list of another async task, | |
thus avoiding the classical [ABA problem](https://en.wikipedia.org/wiki/ABA_problem). | |
@code{.cpp} | |
// main thread retains shared ownership of async task A | |
tf::AsyncTask A = executor.silent_dependent_async([](){}); | |
// task A remains alive (i.e., at least one ref count by the main thread) | |
// when being added to the dependency list of async task B | |
tf::AsyncTask B = executor.silent_dependent_async([](){}, A); | |
@endcode | |
Currently, tf::AsyncTask is implemented based on C++ smart pointer std::shared_ptr and | |
is considered cheap to copy or move as long as only a handful of objects | |
own it. | |
When a worker completes an async task, it will remove the task from the executor, | |
decrementing the number of shared owners by one. | |
If that counter reaches zero, the task is destroyed. | |
*/ | |
class AsyncTask { | |
friend class FlowBuilder; | |
friend class Runtime; | |
friend class Taskflow; | |
friend class TaskView; | |
friend class Executor; | |
public: | |
/** | |
@brief constructs an empty task handle | |
*/ | |
AsyncTask() = default; | |
/** | |
@brief destroys the managed asynchronous task if this is the last owner | |
*/ | |
~AsyncTask() = default; | |
/** | |
@brief constructs an task that shares ownership of @c rhs | |
*/ | |
AsyncTask(const AsyncTask& rhs) = default; | |
/** | |
@brief move-constructs an task from @c rhs | |
*/ | |
AsyncTask(AsyncTask&& rhs) = default; | |
/** | |
@brief shares ownership of the task managed by @c rhs | |
*/ | |
AsyncTask& operator = (const AsyncTask& rhs) = default; | |
/** | |
@brief move-assigns the task from @c rhs | |
*/ | |
AsyncTask& operator = (AsyncTask&& rhs) = default; | |
/** | |
@brief checks if the task stores a non-null shared pointer | |
*/ | |
bool empty() const; | |
/** | |
@brief release the ownership | |
*/ | |
void reset(); | |
/** | |
@brief obtains a hash value of the underlying node | |
*/ | |
size_t hash_value() const; | |
private: | |
AsyncTask(std::shared_ptr<Node>); | |
std::shared_ptr<Node> _node; | |
}; | |
// Constructor | |
inline AsyncTask::AsyncTask(std::shared_ptr<Node> ptr) : _node {std::move(ptr)} { | |
} | |
// Function: empty | |
inline bool AsyncTask::empty() const { | |
return _node == nullptr; | |
} | |
// Function: reset | |
inline void AsyncTask::reset() { | |
_node.reset(); | |
} | |
// Function: hash_value | |
inline size_t AsyncTask::hash_value() const { | |
return std::hash<std::shared_ptr<Node>>{}(_node); | |
} | |
} // end of namespace tf ---------------------------------------------------- | |
/** | |
@file executor.hpp | |
@brief executor include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Executor Definition | |
// ---------------------------------------------------------------------------- | |
/** @class Executor | |
@brief class to create an executor for running a taskflow graph | |
An executor manages a set of worker threads to run one or multiple taskflows | |
using an efficient work-stealing scheduling algorithm. | |
@code{.cpp} | |
// Declare an executor and a taskflow | |
tf::Executor executor; | |
tf::Taskflow taskflow; | |
// Add three tasks into the taskflow | |
tf::Task A = taskflow.emplace([] () { std::cout << "This is TaskA\n"; }); | |
tf::Task B = taskflow.emplace([] () { std::cout << "This is TaskB\n"; }); | |
tf::Task C = taskflow.emplace([] () { std::cout << "This is TaskC\n"; }); | |
// Build precedence between tasks | |
A.precede(B, C); | |
tf::Future<void> fu = executor.run(taskflow); | |
fu.wait(); // block until the execution completes | |
executor.run(taskflow, [](){ std::cout << "end of 1 run"; }).wait(); | |
executor.run_n(taskflow, 4); | |
executor.wait_for_all(); // block until all associated executions finish | |
executor.run_n(taskflow, 4, [](){ std::cout << "end of 4 runs"; }).wait(); | |
executor.run_until(taskflow, [cnt=0] () mutable { return ++cnt == 10; }); | |
@endcode | |
All the @c run methods are @em thread-safe. You can submit multiple | |
taskflows at the same time to an executor from different threads. | |
*/ | |
class Executor { | |
friend class FlowBuilder; | |
friend class Subflow; | |
friend class Runtime; | |
public: | |
/** | |
@brief constructs the executor with @c N worker threads | |
@param N number of workers (default std::thread::hardware_concurrency) | |
@param wix worker interface class to alter worker (thread) behaviors | |
The constructor spawns @c N worker threads to run tasks in a | |
work-stealing loop. The number of workers must be greater than zero | |
or an exception will be thrown. | |
By default, the number of worker threads is equal to the maximum | |
hardware concurrency returned by std::thread::hardware_concurrency. | |
Users can alter the worker behavior, such as changing thread affinity, | |
via deriving an instance from tf::WorkerInterface. | |
*/ | |
explicit Executor( | |
size_t N = std::thread::hardware_concurrency(), | |
std::shared_ptr<WorkerInterface> wix = nullptr | |
); | |
/** | |
@brief destructs the executor | |
The destructor calls Executor::wait_for_all to wait for all submitted | |
taskflows to complete and then notifies all worker threads to stop | |
and join these threads. | |
*/ | |
~Executor(); | |
/** | |
@brief runs a taskflow once | |
@param taskflow a tf::Taskflow object | |
@return a tf::Future that holds the result of the execution | |
This member function executes the given taskflow once and returns a tf::Future | |
object that eventually holds the result of the execution. | |
@code{.cpp} | |
tf::Future<void> future = executor.run(taskflow); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
@attention | |
The executor does not own the given taskflow. It is your responsibility to | |
ensure the taskflow remains alive during its execution. | |
*/ | |
tf::Future<void> run(Taskflow& taskflow); | |
/** | |
@brief runs a moved taskflow once | |
@param taskflow a moved tf::Taskflow object | |
@return a tf::Future that holds the result of the execution | |
This member function executes a moved taskflow once and returns a tf::Future | |
object that eventually holds the result of the execution. | |
The executor will take care of the lifetime of the moved taskflow. | |
@code{.cpp} | |
tf::Future<void> future = executor.run(std::move(taskflow)); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
tf::Future<void> run(Taskflow&& taskflow); | |
/** | |
@brief runs a taskflow once and invoke a callback upon completion | |
@param taskflow a tf::Taskflow object | |
@param callable a callable object to be invoked after this run | |
@return a tf::Future that holds the result of the execution | |
This member function executes the given taskflow once and invokes the given | |
callable when the execution completes. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
@code{.cpp} | |
tf::Future<void> future = executor.run(taskflow, [](){ std::cout << "done"; }); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
@attention | |
The executor does not own the given taskflow. It is your responsibility to | |
ensure the taskflow remains alive during its execution. | |
*/ | |
template<typename C> | |
tf::Future<void> run(Taskflow& taskflow, C&& callable); | |
/** | |
@brief runs a moved taskflow once and invoke a callback upon completion | |
@param taskflow a moved tf::Taskflow object | |
@param callable a callable object to be invoked after this run | |
@return a tf::Future that holds the result of the execution | |
This member function executes a moved taskflow once and invokes the given | |
callable when the execution completes. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
The executor will take care of the lifetime of the moved taskflow. | |
@code{.cpp} | |
tf::Future<void> future = executor.run( | |
std::move(taskflow), [](){ std::cout << "done"; } | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template<typename C> | |
tf::Future<void> run(Taskflow&& taskflow, C&& callable); | |
/** | |
@brief runs a taskflow for @c N times | |
@param taskflow a tf::Taskflow object | |
@param N number of runs | |
@return a tf::Future that holds the result of the execution | |
This member function executes the given taskflow @c N times and returns a tf::Future | |
object that eventually holds the result of the execution. | |
@code{.cpp} | |
tf::Future<void> future = executor.run_n(taskflow, 2); // run taskflow 2 times | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
@attention | |
The executor does not own the given taskflow. It is your responsibility to | |
ensure the taskflow remains alive during its execution. | |
*/ | |
tf::Future<void> run_n(Taskflow& taskflow, size_t N); | |
/** | |
@brief runs a moved taskflow for @c N times | |
@param taskflow a moved tf::Taskflow object | |
@param N number of runs | |
@return a tf::Future that holds the result of the execution | |
This member function executes a moved taskflow @c N times and returns a tf::Future | |
object that eventually holds the result of the execution. | |
The executor will take care of the lifetime of the moved taskflow. | |
@code{.cpp} | |
tf::Future<void> future = executor.run_n( | |
std::move(taskflow), 2 // run the moved taskflow 2 times | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
tf::Future<void> run_n(Taskflow&& taskflow, size_t N); | |
/** | |
@brief runs a taskflow for @c N times and then invokes a callback | |
@param taskflow a tf::Taskflow | |
@param N number of runs | |
@param callable a callable object to be invoked after this run | |
@return a tf::Future that holds the result of the execution | |
This member function executes the given taskflow @c N times and invokes the given | |
callable when the execution completes. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
@code{.cpp} | |
tf::Future<void> future = executor.run( | |
taskflow, 2, [](){ std::cout << "done"; } // runs taskflow 2 times and invoke | |
// the lambda to print "done" | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
@attention | |
The executor does not own the given taskflow. It is your responsibility to | |
ensure the taskflow remains alive during its execution. | |
*/ | |
template<typename C> | |
tf::Future<void> run_n(Taskflow& taskflow, size_t N, C&& callable); | |
/** | |
@brief runs a moved taskflow for @c N times and then invokes a callback | |
@param taskflow a moved tf::Taskflow | |
@param N number of runs | |
@param callable a callable object to be invoked after this run | |
@return a tf::Future that holds the result of the execution | |
This member function executes a moved taskflow @c N times and invokes the given | |
callable when the execution completes. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
@code{.cpp} | |
tf::Future<void> future = executor.run_n( | |
// run the moved taskflow 2 times and invoke the lambda to print "done" | |
std::move(taskflow), 2, [](){ std::cout << "done"; } | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template<typename C> | |
tf::Future<void> run_n(Taskflow&& taskflow, size_t N, C&& callable); | |
/** | |
@brief runs a taskflow multiple times until the predicate becomes true | |
@param taskflow a tf::Taskflow | |
@param pred a boolean predicate to return @c true for stop | |
@return a tf::Future that holds the result of the execution | |
This member function executes the given taskflow multiple times until | |
the predicate returns @c true. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
@code{.cpp} | |
tf::Future<void> future = executor.run_until( | |
taskflow, [](){ return rand()%10 == 0 } | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
@attention | |
The executor does not own the given taskflow. It is your responsibility to | |
ensure the taskflow remains alive during its execution. | |
*/ | |
template<typename P> | |
tf::Future<void> run_until(Taskflow& taskflow, P&& pred); | |
/** | |
@brief runs a moved taskflow and keeps running it | |
until the predicate becomes true | |
@param taskflow a moved tf::Taskflow object | |
@param pred a boolean predicate to return @c true for stop | |
@return a tf::Future that holds the result of the execution | |
This member function executes a moved taskflow multiple times until | |
the predicate returns @c true. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
The executor will take care of the lifetime of the moved taskflow. | |
@code{.cpp} | |
tf::Future<void> future = executor.run_until( | |
std::move(taskflow), [](){ return rand()%10 == 0 } | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template<typename P> | |
tf::Future<void> run_until(Taskflow&& taskflow, P&& pred); | |
/** | |
@brief runs a taskflow multiple times until the predicate becomes true and | |
then invokes the callback | |
@param taskflow a tf::Taskflow | |
@param pred a boolean predicate to return @c true for stop | |
@param callable a callable object to be invoked after this run completes | |
@return a tf::Future that holds the result of the execution | |
This member function executes the given taskflow multiple times until | |
the predicate returns @c true and then invokes the given callable when | |
the execution completes. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
@code{.cpp} | |
tf::Future<void> future = executor.run_until( | |
taskflow, [](){ return rand()%10 == 0 }, [](){ std::cout << "done"; } | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
@attention | |
The executor does not own the given taskflow. It is your responsibility to | |
ensure the taskflow remains alive during its execution. | |
*/ | |
template<typename P, typename C> | |
tf::Future<void> run_until(Taskflow& taskflow, P&& pred, C&& callable); | |
/** | |
@brief runs a moved taskflow and keeps running | |
it until the predicate becomes true and then invokes the callback | |
@param taskflow a moved tf::Taskflow | |
@param pred a boolean predicate to return @c true for stop | |
@param callable a callable object to be invoked after this run completes | |
@return a tf::Future that holds the result of the execution | |
This member function executes a moved taskflow multiple times until | |
the predicate returns @c true and then invokes the given callable when | |
the execution completes. | |
This member function returns a tf::Future object that | |
eventually holds the result of the execution. | |
The executor will take care of the lifetime of the moved taskflow. | |
@code{.cpp} | |
tf::Future<void> future = executor.run_until( | |
std::move(taskflow), | |
[](){ return rand()%10 == 0 }, [](){ std::cout << "done"; } | |
); | |
// do something else | |
future.wait(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template<typename P, typename C> | |
tf::Future<void> run_until(Taskflow&& taskflow, P&& pred, C&& callable); | |
/** | |
@brief runs a target graph and waits until it completes using | |
an internal worker of this executor | |
@tparam T target type which has `tf::Graph& T::graph()` defined | |
@param target the target task graph object | |
The method runs a target graph which has `tf::Graph& T::graph()` defined | |
and waits until the execution completes. | |
Unlike the typical flow of calling `tf::Executor::run` series | |
plus waiting on the result, this method must be called by an internal | |
worker of this executor. The caller worker will participate in | |
the work-stealing loop of the scheduler, therby avoiding potential | |
deadlock caused by blocked waiting. | |
@code{.cpp} | |
tf::Executor executor(2); | |
tf::Taskflow taskflow; | |
std::array<tf::Taskflow, 1000> others; | |
std::atomic<size_t> counter{0}; | |
for(size_t n=0; n<1000; n++) { | |
for(size_t i=0; i<1000; i++) { | |
others[n].emplace([&](){ counter++; }); | |
} | |
taskflow.emplace([&executor, &tf=others[n]](){ | |
executor.corun(tf); | |
//executor.run(tf).wait(); <- blocking the worker without doing anything | |
// will introduce deadlock | |
}); | |
} | |
executor.run(taskflow).wait(); | |
@endcode | |
The method is thread-safe as long as the target is not concurrently | |
ran by two or more threads. | |
@attention | |
You must call tf::Executor::corun from a worker of the calling executor | |
or an exception will be thrown. | |
*/ | |
template <typename T> | |
void corun(T& target); | |
/** | |
@brief keeps running the work-stealing loop until the predicate becomes true | |
@tparam P predicate type | |
@param predicate a boolean predicate to indicate when to stop the loop | |
The method keeps the caller worker running in the work-stealing loop | |
until the stop predicate becomes true. | |
@code{.cpp} | |
taskflow.emplace([&](){ | |
std::future<void> fu = std::async([](){ std::sleep(100s); }); | |
executor.corun_until([](){ | |
return fu.wait_for(std::chrono::seconds(0)) == future_status::ready; | |
}); | |
}); | |
@endcode | |
@attention | |
You must call tf::Executor::corun_until from a worker of the calling executor | |
or an exception will be thrown. | |
*/ | |
template <typename P> | |
void corun_until(P&& predicate); | |
/** | |
@brief waits for all tasks to complete | |
This member function waits until all submitted tasks | |
(e.g., taskflows, asynchronous tasks) to finish. | |
@code{.cpp} | |
executor.run(taskflow1); | |
executor.run_n(taskflow2, 10); | |
executor.run_n(taskflow3, 100); | |
executor.wait_for_all(); // wait until the above submitted taskflows finish | |
@endcode | |
*/ | |
void wait_for_all(); | |
/** | |
@brief queries the number of worker threads | |
Each worker represents one unique thread spawned by an executor | |
upon its construction time. | |
@code{.cpp} | |
tf::Executor executor(4); | |
std::cout << executor.num_workers(); // 4 | |
@endcode | |
*/ | |
size_t num_workers() const noexcept; | |
/** | |
@brief queries the number of running topologies at the time of this call | |
When a taskflow is submitted to an executor, a topology is created to store | |
runtime metadata of the running taskflow. | |
When the execution of the submitted taskflow finishes, | |
its corresponding topology will be removed from the executor. | |
@code{.cpp} | |
executor.run(taskflow); | |
std::cout << executor.num_topologies(); // 0 or 1 (taskflow still running) | |
@endcode | |
*/ | |
size_t num_topologies() const; | |
/** | |
@brief queries the number of running taskflows with moved ownership | |
@code{.cpp} | |
executor.run(std::move(taskflow)); | |
std::cout << executor.num_taskflows(); // 0 or 1 (taskflow still running) | |
@endcode | |
*/ | |
size_t num_taskflows() const; | |
/** | |
@brief queries the id of the caller thread in this executor | |
Each worker has an unique id in the range of @c 0 to @c N-1 associated with | |
its parent executor. | |
If the caller thread does not belong to the executor, @c -1 is returned. | |
@code{.cpp} | |
tf::Executor executor(4); // 4 workers in the executor | |
executor.this_worker_id(); // -1 (main thread is not a worker) | |
taskflow.emplace([&](){ | |
std::cout << executor.this_worker_id(); // 0, 1, 2, or 3 | |
}); | |
executor.run(taskflow); | |
@endcode | |
*/ | |
int this_worker_id() const; | |
// -------------------------------------------------------------------------- | |
// Observer methods | |
// -------------------------------------------------------------------------- | |
/** | |
@brief constructs an observer to inspect the activities of worker threads | |
@tparam Observer observer type derived from tf::ObserverInterface | |
@tparam ArgsT argument parameter pack | |
@param args arguments to forward to the constructor of the observer | |
@return a shared pointer to the created observer | |
Each executor manages a list of observers with shared ownership with callers. | |
For each of these observers, the two member functions, | |
tf::ObserverInterface::on_entry and tf::ObserverInterface::on_exit | |
will be called before and after the execution of a task. | |
This member function is not thread-safe. | |
*/ | |
template <typename Observer, typename... ArgsT> | |
std::shared_ptr<Observer> make_observer(ArgsT&&... args); | |
/** | |
@brief removes an observer from the executor | |
This member function is not thread-safe. | |
*/ | |
template <typename Observer> | |
void remove_observer(std::shared_ptr<Observer> observer); | |
/** | |
@brief queries the number of observers | |
*/ | |
size_t num_observers() const noexcept; | |
// -------------------------------------------------------------------------- | |
// Async Task Methods | |
// -------------------------------------------------------------------------- | |
/** | |
@brief runs a given function asynchronously | |
@tparam F callable type | |
@param func callable object | |
@return a @std_future that will hold the result of the execution | |
The method creates an asynchronous task to run the given function | |
and return a @std_future object that eventually will hold the result | |
of the return value. | |
@code{.cpp} | |
std::future<int> future = executor.async([](){ | |
std::cout << "create an asynchronous task and returns 1\n"; | |
return 1; | |
}); | |
future.get(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F> | |
auto async(F&& func); | |
/** | |
@brief runs a given function asynchronously and gives a name to this task | |
@tparam F callable type | |
@param name name of the asynchronous task | |
@param func callable object | |
@return a @std_future that will hold the result of the execution | |
The method creates and assigns a name to an asynchronous task | |
to run the given function, | |
returning @std_future object that eventually will hold the result | |
Assigned task names will appear in the observers of the executor. | |
@code{.cpp} | |
std::future<int> future = executor.async("name", [](){ | |
std::cout << "create an asynchronous task with a name and returns 1\n"; | |
return 1; | |
}); | |
future.get(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F> | |
auto async(const std::string& name, F&& func); | |
/** | |
@brief similar to tf::Executor::async but does not return a future object | |
@tparam F callable type | |
@param func callable object | |
This member function is more efficient than tf::Executor::async | |
and is encouraged to use when you do not want a @std_future to | |
acquire the result or synchronize the execution. | |
@code{.cpp} | |
executor.silent_async([](){ | |
std::cout << "create an asynchronous task with no return\n"; | |
}); | |
executor.wait_for_all(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F> | |
void silent_async(F&& func); | |
/** | |
@brief similar to tf::Executor::async but does not return a future object | |
@tparam F callable type | |
@param name assigned name to the task | |
@param func callable object | |
This member function is more efficient than tf::Executor::async | |
and is encouraged to use when you do not want a @std_future to | |
acquire the result or synchronize the execution. | |
Assigned task names will appear in the observers of the executor. | |
@code{.cpp} | |
executor.silent_async("name", [](){ | |
std::cout << "create an asynchronous task with a name and no return\n"; | |
}); | |
executor.wait_for_all(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F> | |
void silent_async(const std::string& name, F&& func); | |
// -------------------------------------------------------------------------- | |
// Silent Dependent Async Methods | |
// -------------------------------------------------------------------------- | |
/** | |
@brief runs the given function asynchronously | |
when the given dependents finish | |
@tparam F callable type | |
@tparam Tasks task types convertible to tf::AsyncTask | |
@param func callable object | |
@param tasks asynchronous tasks on which this execution depends | |
@return a tf::AsyncTask handle | |
This member function is more efficient than tf::Executor::dependent_async | |
and is encouraged to use when you do not want a @std_future to | |
acquire the result or synchronize the execution. | |
The example below creates three asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
@code{.cpp} | |
tf::AsyncTask A = executor.silent_dependent_async([](){ printf("A\n"); }); | |
tf::AsyncTask B = executor.silent_dependent_async([](){ printf("B\n"); }); | |
executor.silent_dependent_async([](){ printf("C runs after A and B\n"); }, A, B); | |
executor.wait_for_all(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr | |
> | |
tf::AsyncTask silent_dependent_async(F&& func, Tasks&&... tasks); | |
/** | |
@brief names and runs the given function asynchronously | |
when the given dependents finish | |
@tparam F callable type | |
@tparam Tasks task types convertible to tf::AsyncTask | |
@param name assigned name to the task | |
@param func callable object | |
@param tasks asynchronous tasks on which this execution depends | |
@return a tf::AsyncTask handle | |
This member function is more efficient than tf::Executor::dependent_async | |
and is encouraged to use when you do not want a @std_future to | |
acquire the result or synchronize the execution. | |
The example below creates three asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
Assigned task names will appear in the observers of the executor. | |
@code{.cpp} | |
tf::AsyncTask A = executor.silent_dependent_async("A", [](){ printf("A\n"); }); | |
tf::AsyncTask B = executor.silent_dependent_async("B", [](){ printf("B\n"); }); | |
executor.silent_dependent_async( | |
"C", [](){ printf("C runs after A and B\n"); }, A, B | |
); | |
executor.wait_for_all(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr | |
> | |
tf::AsyncTask silent_dependent_async(const std::string& name, F&& func, Tasks&&... tasks); | |
/** | |
@brief runs the given function asynchronously | |
when the given range of dependents finish | |
@tparam F callable type | |
@tparam I iterator type | |
@param func callable object | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@return a tf::AsyncTask handle | |
This member function is more efficient than tf::Executor::dependent_async | |
and is encouraged to use when you do not want a @std_future to | |
acquire the result or synchronize the execution. | |
The example below creates three asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
@code{.cpp} | |
std::array<tf::AsyncTask, 2> array { | |
executor.silent_dependent_async([](){ printf("A\n"); }), | |
executor.silent_dependent_async([](){ printf("B\n"); }) | |
}; | |
executor.silent_dependent_async( | |
[](){ printf("C runs after A and B\n"); }, array.begin(), array.end() | |
); | |
executor.wait_for_all(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr | |
> | |
tf::AsyncTask silent_dependent_async(F&& func, I first, I last); | |
/** | |
@brief names and runs the given function asynchronously | |
when the given range of dependents finish | |
@tparam F callable type | |
@tparam I iterator type | |
@param name assigned name to the task | |
@param func callable object | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@return a tf::AsyncTask handle | |
This member function is more efficient than tf::Executor::dependent_async | |
and is encouraged to use when you do not want a @std_future to | |
acquire the result or synchronize the execution. | |
The example below creates three asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
Assigned task names will appear in the observers of the executor. | |
@code{.cpp} | |
std::array<tf::AsyncTask, 2> array { | |
executor.silent_dependent_async("A", [](){ printf("A\n"); }), | |
executor.silent_dependent_async("B", [](){ printf("B\n"); }) | |
}; | |
executor.silent_dependent_async( | |
"C", [](){ printf("C runs after A and B\n"); }, array.begin(), array.end() | |
); | |
executor.wait_for_all(); | |
@endcode | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr | |
> | |
tf::AsyncTask silent_dependent_async(const std::string& name, F&& func, I first, I last); | |
// -------------------------------------------------------------------------- | |
// Dependent Async Methods | |
// -------------------------------------------------------------------------- | |
/** | |
@brief runs the given function asynchronously | |
when the given dependents finish | |
@tparam F callable type | |
@tparam Tasks task types convertible to tf::AsyncTask | |
@param func callable object | |
@param tasks asynchronous tasks on which this execution depends | |
@return a pair of a tf::AsyncTask handle and | |
a @std_future that holds the result of the execution | |
The example below creates three asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int> | |
that eventually will hold the result of the execution. | |
@code{.cpp} | |
tf::AsyncTask A = executor.silent_dependent_async([](){ printf("A\n"); }); | |
tf::AsyncTask B = executor.silent_dependent_async([](){ printf("B\n"); }); | |
auto [C, fuC] = executor.dependent_async( | |
[](){ | |
printf("C runs after A and B\n"); | |
return 1; | |
}, | |
A, B | |
); | |
fuC.get(); // C finishes, which in turns means both A and B finish | |
@endcode | |
You can mixed the use of tf::AsyncTask handles | |
returned by Executor::dependent_async and Executor::silent_dependent_async | |
when specifying task dependencies. | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr | |
> | |
auto dependent_async(F&& func, Tasks&&... tasks); | |
/** | |
@brief names and runs the given function asynchronously | |
when the given dependents finish | |
@tparam F callable type | |
@tparam Tasks task types convertible to tf::AsyncTask | |
@param name assigned name to the task | |
@param func callable object | |
@param tasks asynchronous tasks on which this execution depends | |
@return a pair of a tf::AsyncTask handle and | |
a @std_future that holds the result of the execution | |
The example below creates three named asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int> | |
that eventually will hold the result of the execution. | |
Assigned task names will appear in the observers of the executor. | |
@code{.cpp} | |
tf::AsyncTask A = executor.silent_dependent_async("A", [](){ printf("A\n"); }); | |
tf::AsyncTask B = executor.silent_dependent_async("B", [](){ printf("B\n"); }); | |
auto [C, fuC] = executor.dependent_async( | |
"C", | |
[](){ | |
printf("C runs after A and B\n"); | |
return 1; | |
}, | |
A, B | |
); | |
assert(fuC.get()==1); // C finishes, which in turns means both A and B finish | |
@endcode | |
You can mixed the use of tf::AsyncTask handles | |
returned by Executor::dependent_async and Executor::silent_dependent_async | |
when specifying task dependencies. | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr | |
> | |
auto dependent_async(const std::string& name, F&& func, Tasks&&... tasks); | |
/** | |
@brief runs the given function asynchronously | |
when the given range of dependents finish | |
@tparam F callable type | |
@tparam I iterator type | |
@param func callable object | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@return a pair of a tf::AsyncTask handle and | |
a @std_future that holds the result of the execution | |
The example below creates three asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int> | |
that eventually will hold the result of the execution. | |
@code{.cpp} | |
std::array<tf::AsyncTask, 2> array { | |
executor.silent_dependent_async([](){ printf("A\n"); }), | |
executor.silent_dependent_async([](){ printf("B\n"); }) | |
}; | |
auto [C, fuC] = executor.dependent_async( | |
[](){ | |
printf("C runs after A and B\n"); | |
return 1; | |
}, | |
array.begin(), array.end() | |
); | |
assert(fuC.get()==1); // C finishes, which in turns means both A and B finish | |
@endcode | |
You can mixed the use of tf::AsyncTask handles | |
returned by Executor::dependent_async and Executor::silent_dependent_async | |
when specifying task dependencies. | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr | |
> | |
auto dependent_async(F&& func, I first, I last); | |
/** | |
@brief names and runs the given function asynchronously | |
when the given range of dependents finish | |
@tparam F callable type | |
@tparam I iterator type | |
@param name assigned name to the task | |
@param func callable object | |
@param first iterator to the beginning (inclusive) | |
@param last iterator to the end (exclusive) | |
@return a pair of a tf::AsyncTask handle and | |
a @std_future that holds the result of the execution | |
The example below creates three named asynchronous tasks, @c A, @c B, and @c C, | |
in which task @c C runs after task @c A and task @c B. | |
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int> | |
that eventually will hold the result of the execution. | |
Assigned task names will appear in the observers of the executor. | |
@code{.cpp} | |
std::array<tf::AsyncTask, 2> array { | |
executor.silent_dependent_async("A", [](){ printf("A\n"); }), | |
executor.silent_dependent_async("B", [](){ printf("B\n"); }) | |
}; | |
auto [C, fuC] = executor.dependent_async( | |
"C", | |
[](){ | |
printf("C runs after A and B\n"); | |
return 1; | |
}, | |
array.begin(), array.end() | |
); | |
assert(fuC.get()==1); // C finishes, which in turns means both A and B finish | |
@endcode | |
You can mixed the use of tf::AsyncTask handles | |
returned by Executor::dependent_async and Executor::silent_dependent_async | |
when specifying task dependencies. | |
This member function is thread-safe. | |
*/ | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr | |
> | |
auto dependent_async(const std::string& name, F&& func, I first, I last); | |
private: | |
const size_t _MAX_STEALS; | |
std::condition_variable _topology_cv; | |
std::mutex _taskflows_mutex; | |
std::mutex _topology_mutex; | |
std::mutex _wsq_mutex; | |
std::mutex _asyncs_mutex; | |
size_t _num_topologies {0}; | |
std::unordered_map<std::thread::id, size_t> _wids; | |
std::vector<std::thread> _threads; | |
std::vector<Worker> _workers; | |
std::list<Taskflow> _taskflows; | |
std::unordered_set<std::shared_ptr<Node>> _asyncs; | |
Notifier _notifier; | |
TaskQueue<Node*> _wsq; | |
std::atomic<bool> _done {0}; | |
std::shared_ptr<WorkerInterface> _worker_interface; | |
std::unordered_set<std::shared_ptr<ObserverInterface>> _observers; | |
Worker* _this_worker(); | |
bool _wait_for_task(Worker&, Node*&); | |
void _observer_prologue(Worker&, Node*); | |
void _observer_epilogue(Worker&, Node*); | |
void _spawn(size_t); | |
void _exploit_task(Worker&, Node*&); | |
void _explore_task(Worker&, Node*&); | |
void _schedule(Worker&, Node*); | |
void _schedule(Node*); | |
void _schedule(Worker&, const SmallVector<Node*>&); | |
void _schedule(const SmallVector<Node*>&); | |
void _set_up_topology(Worker*, Topology*); | |
void _tear_down_topology(Worker&, Topology*); | |
void _tear_down_async(Node*); | |
void _tear_down_dependent_async(Worker&, Node*); | |
void _tear_down_invoke(Worker&, Node*); | |
void _increment_topology(); | |
void _decrement_topology(); | |
void _decrement_topology_and_notify(); | |
void _invoke(Worker&, Node*); | |
void _invoke_static_task(Worker&, Node*); | |
void _invoke_dynamic_task(Worker&, Node*); | |
void _consume_graph(Worker&, Node*, Graph&); | |
void _detach_dynamic_task(Worker&, Node*, Graph&); | |
void _invoke_condition_task(Worker&, Node*, SmallVector<int>&); | |
void _invoke_multi_condition_task(Worker&, Node*, SmallVector<int>&); | |
void _invoke_module_task(Worker&, Node*); | |
void _invoke_async_task(Worker&, Node*); | |
void _invoke_dependent_async_task(Worker&, Node*); | |
void _process_async_dependent(Node*, tf::AsyncTask&, size_t&); | |
void _schedule_async_task(Node*); | |
template <typename P> | |
void _corun_until(Worker&, P&&); | |
template <typename R, typename F> | |
auto _make_promised_async(std::promise<R>&&, F&&); | |
}; | |
// Constructor | |
inline Executor::Executor(size_t N, std::shared_ptr<WorkerInterface> wix) : | |
_MAX_STEALS {((N+1) << 1)}, | |
_threads {N}, | |
_workers {N}, | |
_notifier {N}, | |
_worker_interface {std::move(wix)} { | |
if(N == 0) { | |
TF_THROW("no cpu workers to execute taskflows"); | |
} | |
_spawn(N); | |
// instantite the default observer if requested | |
if(has_env(TF_ENABLE_PROFILER)) { | |
TFProfManager::get()._manage(make_observer<TFProfObserver>()); | |
} | |
} | |
// Destructor | |
inline Executor::~Executor() { | |
// wait for all topologies to complete | |
wait_for_all(); | |
// shut down the scheduler | |
_done = true; | |
_notifier.notify(true); | |
for(auto& t : _threads){ | |
t.join(); | |
} | |
} | |
// Function: num_workers | |
inline size_t Executor::num_workers() const noexcept { | |
return _workers.size(); | |
} | |
// Function: num_topologies | |
inline size_t Executor::num_topologies() const { | |
return _num_topologies; | |
} | |
// Function: num_taskflows | |
inline size_t Executor::num_taskflows() const { | |
return _taskflows.size(); | |
} | |
// Function: _this_worker | |
inline Worker* Executor::_this_worker() { | |
auto itr = _wids.find(std::this_thread::get_id()); | |
return itr == _wids.end() ? nullptr : &_workers[itr->second]; | |
} | |
// Function: this_worker_id | |
inline int Executor::this_worker_id() const { | |
auto i = _wids.find(std::this_thread::get_id()); | |
return i == _wids.end() ? -1 : static_cast<int>(_workers[i->second]._id); | |
} | |
// Procedure: _spawn | |
inline void Executor::_spawn(size_t N) { | |
std::mutex mutex; | |
std::condition_variable cond; | |
size_t n=0; | |
for(size_t id=0; id<N; ++id) { | |
_workers[id]._id = id; | |
_workers[id]._vtm = id; | |
_workers[id]._executor = this; | |
_workers[id]._waiter = &_notifier._waiters[id]; | |
_threads[id] = std::thread([this] ( | |
Worker& w, std::mutex& mutex, std::condition_variable& cond, size_t& n | |
) -> void { | |
// assign the thread | |
w._thread = &_threads[w._id]; | |
// enables the mapping | |
{ | |
std::scoped_lock lock(mutex); | |
_wids[std::this_thread::get_id()] = w._id; | |
if(n++; n == num_workers()) { | |
cond.notify_one(); | |
} | |
} | |
Node* t = nullptr; | |
// before entering the scheduler (work-stealing loop), | |
// call the user-specified prologue function | |
if(_worker_interface) { | |
_worker_interface->scheduler_prologue(w); | |
} | |
// must use 1 as condition instead of !done because | |
// the previous worker may stop while the following workers | |
// are still preparing for entering the scheduling loop | |
std::exception_ptr ptr{nullptr}; | |
try { | |
while(1) { | |
// execute the tasks. | |
_exploit_task(w, t); | |
// wait for tasks | |
if(_wait_for_task(w, t) == false) { | |
break; | |
} | |
} | |
} | |
catch(...) { | |
ptr = std::current_exception(); | |
} | |
// call the user-specified epilogue function | |
if(_worker_interface) { | |
_worker_interface->scheduler_epilogue(w, ptr); | |
} | |
}, std::ref(_workers[id]), std::ref(mutex), std::ref(cond), std::ref(n)); | |
// POSIX-like system can use the following to affine threads to cores | |
//cpu_set_t cpuset; | |
//CPU_ZERO(&cpuset); | |
//CPU_SET(id, &cpuset); | |
//pthread_setaffinity_np( | |
// _threads[id].native_handle(), sizeof(cpu_set_t), &cpuset | |
//); | |
} | |
std::unique_lock<std::mutex> lock(mutex); | |
cond.wait(lock, [&](){ return n==N; }); | |
} | |
// Function: _corun_until | |
template <typename P> | |
void Executor::_corun_until(Worker& w, P&& stop_predicate) { | |
std::uniform_int_distribution<size_t> rdvtm(0, _workers.size()-1); | |
exploit: | |
while(!stop_predicate()) { | |
//exploit: | |
if(auto t = w._wsq.pop(); t) { | |
_invoke(w, t); | |
} | |
else { | |
size_t num_steals = 0; | |
explore: | |
t = (w._id == w._vtm) ? _wsq.steal() : _workers[w._vtm]._wsq.steal(); | |
if(t) { | |
_invoke(w, t); | |
goto exploit; | |
} | |
else if(!stop_predicate()) { | |
if(num_steals++ > _MAX_STEALS) { | |
std::this_thread::yield(); | |
} | |
w._vtm = rdvtm(w._rdgen); | |
goto explore; | |
} | |
else { | |
break; | |
} | |
} | |
} | |
} | |
// Function: _explore_task | |
inline void Executor::_explore_task(Worker& w, Node*& t) { | |
//assert(_workers[w].wsq.empty()); | |
//assert(!t); | |
size_t num_steals = 0; | |
size_t num_yields = 0; | |
std::uniform_int_distribution<size_t> rdvtm(0, _workers.size()-1); | |
// Here, we write do-while to make the worker steal at once | |
// from the assigned victim. | |
do { | |
t = (w._id == w._vtm) ? _wsq.steal() : _workers[w._vtm]._wsq.steal(); | |
if(t) { | |
break; | |
} | |
if(num_steals++ > _MAX_STEALS) { | |
std::this_thread::yield(); | |
if(num_yields++ > 100) { | |
break; | |
} | |
} | |
w._vtm = rdvtm(w._rdgen); | |
} while(!_done); | |
} | |
// Procedure: _exploit_task | |
inline void Executor::_exploit_task(Worker& w, Node*& t) { | |
while(t) { | |
_invoke(w, t); | |
t = w._wsq.pop(); | |
} | |
} | |
// Function: _wait_for_task | |
inline bool Executor::_wait_for_task(Worker& worker, Node*& t) { | |
explore_task: | |
_explore_task(worker, t); | |
// The last thief who successfully stole a task will wake up | |
// another thief worker to avoid starvation. | |
if(t) { | |
_notifier.notify(false); | |
return true; | |
} | |
// ---- 2PC guard ---- | |
_notifier.prepare_wait(worker._waiter); | |
if(!_wsq.empty()) { | |
_notifier.cancel_wait(worker._waiter); | |
worker._vtm = worker._id; | |
goto explore_task; | |
} | |
if(_done) { | |
_notifier.cancel_wait(worker._waiter); | |
_notifier.notify(true); | |
return false; | |
} | |
// We need to use index-based scanning to avoid data race | |
// with _spawn which may initialize a worker at the same time. | |
for(size_t vtm=0; vtm<_workers.size(); vtm++) { | |
if(!_workers[vtm]._wsq.empty()) { | |
_notifier.cancel_wait(worker._waiter); | |
worker._vtm = vtm; | |
goto explore_task; | |
} | |
} | |
// Now I really need to relinguish my self to others | |
_notifier.commit_wait(worker._waiter); | |
goto explore_task; | |
} | |
// Function: make_observer | |
template<typename Observer, typename... ArgsT> | |
std::shared_ptr<Observer> Executor::make_observer(ArgsT&&... args) { | |
static_assert( | |
std::is_base_of_v<ObserverInterface, Observer>, | |
"Observer must be derived from ObserverInterface" | |
); | |
// use a local variable to mimic the constructor | |
auto ptr = std::make_shared<Observer>(std::forward<ArgsT>(args)...); | |
ptr->set_up(_workers.size()); | |
_observers.emplace(std::static_pointer_cast<ObserverInterface>(ptr)); | |
return ptr; | |
} | |
// Procedure: remove_observer | |
template <typename Observer> | |
void Executor::remove_observer(std::shared_ptr<Observer> ptr) { | |
static_assert( | |
std::is_base_of_v<ObserverInterface, Observer>, | |
"Observer must be derived from ObserverInterface" | |
); | |
_observers.erase(std::static_pointer_cast<ObserverInterface>(ptr)); | |
} | |
// Function: num_observers | |
inline size_t Executor::num_observers() const noexcept { | |
return _observers.size(); | |
} | |
// Procedure: _schedule | |
inline void Executor::_schedule(Worker& worker, Node* node) { | |
// We need to fetch p before the release such that the read | |
// operation is synchronized properly with other thread to | |
// void data race. | |
auto p = node->_priority; | |
node->_state.fetch_or(Node::READY, std::memory_order_release); | |
// caller is a worker to this pool - starting at v3.5 we do not use | |
// any complicated notification mechanism as the experimental result | |
// has shown no significant advantage. | |
if(worker._executor == this) { | |
worker._wsq.push(node, p); | |
_notifier.notify(false); | |
return; | |
} | |
{ | |
std::lock_guard<std::mutex> lock(_wsq_mutex); | |
_wsq.push(node, p); | |
} | |
_notifier.notify(false); | |
} | |
// Procedure: _schedule | |
inline void Executor::_schedule(Node* node) { | |
// We need to fetch p before the release such that the read | |
// operation is synchronized properly with other thread to | |
// void data race. | |
auto p = node->_priority; | |
node->_state.fetch_or(Node::READY, std::memory_order_release); | |
{ | |
std::lock_guard<std::mutex> lock(_wsq_mutex); | |
_wsq.push(node, p); | |
} | |
_notifier.notify(false); | |
} | |
// Procedure: _schedule | |
inline void Executor::_schedule(Worker& worker, const SmallVector<Node*>& nodes) { | |
// We need to cacth the node count to avoid accessing the nodes | |
// vector while the parent topology is removed! | |
const auto num_nodes = nodes.size(); | |
if(num_nodes == 0) { | |
return; | |
} | |
// caller is a worker to this pool - starting at v3.5 we do not use | |
// any complicated notification mechanism as the experimental result | |
// has shown no significant advantage. | |
if(worker._executor == this) { | |
for(size_t i=0; i<num_nodes; ++i) { | |
// We need to fetch p before the release such that the read | |
// operation is synchronized properly with other thread to | |
// void data race. | |
auto p = nodes[i]->_priority; | |
nodes[i]->_state.fetch_or(Node::READY, std::memory_order_release); | |
worker._wsq.push(nodes[i], p); | |
_notifier.notify(false); | |
} | |
return; | |
} | |
{ | |
std::lock_guard<std::mutex> lock(_wsq_mutex); | |
for(size_t k=0; k<num_nodes; ++k) { | |
auto p = nodes[k]->_priority; | |
nodes[k]->_state.fetch_or(Node::READY, std::memory_order_release); | |
_wsq.push(nodes[k], p); | |
} | |
} | |
_notifier.notify_n(num_nodes); | |
} | |
// Procedure: _schedule | |
inline void Executor::_schedule(const SmallVector<Node*>& nodes) { | |
// parent topology may be removed! | |
const auto num_nodes = nodes.size(); | |
if(num_nodes == 0) { | |
return; | |
} | |
// We need to fetch p before the release such that the read | |
// operation is synchronized properly with other thread to | |
// void data race. | |
{ | |
std::lock_guard<std::mutex> lock(_wsq_mutex); | |
for(size_t k=0; k<num_nodes; ++k) { | |
auto p = nodes[k]->_priority; | |
nodes[k]->_state.fetch_or(Node::READY, std::memory_order_release); | |
_wsq.push(nodes[k], p); | |
} | |
} | |
_notifier.notify_n(num_nodes); | |
} | |
// Procedure: _invoke | |
inline void Executor::_invoke(Worker& worker, Node* node) { | |
// synchronize all outstanding memory operations caused by reordering | |
while(!(node->_state.load(std::memory_order_acquire) & Node::READY)); | |
begin_invoke: | |
// no need to do other things if the topology is cancelled | |
if(node->_is_cancelled()) { | |
_tear_down_invoke(worker, node); | |
return; | |
} | |
// if acquiring semaphore(s) exists, acquire them first | |
if(node->_semaphores && !node->_semaphores->to_acquire.empty()) { | |
SmallVector<Node*> nodes; | |
if(!node->_acquire_all(nodes)) { | |
_schedule(worker, nodes); | |
return; | |
} | |
node->_state.fetch_or(Node::ACQUIRED, std::memory_order_release); | |
} | |
// condition task | |
//int cond = -1; | |
SmallVector<int> conds; | |
// switch is faster than nested if-else due to jump table | |
switch(node->_handle.index()) { | |
// static task | |
case Node::STATIC:{ | |
_invoke_static_task(worker, node); | |
} | |
break; | |
// dynamic task | |
case Node::DYNAMIC: { | |
_invoke_dynamic_task(worker, node); | |
} | |
break; | |
// condition task | |
case Node::CONDITION: { | |
_invoke_condition_task(worker, node, conds); | |
} | |
break; | |
// multi-condition task | |
case Node::MULTI_CONDITION: { | |
_invoke_multi_condition_task(worker, node, conds); | |
} | |
break; | |
// module task | |
case Node::MODULE: { | |
_invoke_module_task(worker, node); | |
} | |
break; | |
// async task | |
case Node::ASYNC: { | |
_invoke_async_task(worker, node); | |
_tear_down_async(node); | |
return ; | |
} | |
break; | |
// dependent async task | |
case Node::DEPENDENT_ASYNC: { | |
_invoke_dependent_async_task(worker, node); | |
_tear_down_dependent_async(worker, node); | |
if(worker._cache) { | |
node = worker._cache; | |
goto begin_invoke; | |
} | |
return; | |
} | |
break; | |
// monostate (placeholder) | |
default: | |
break; | |
} | |
// if releasing semaphores exist, release them | |
if(node->_semaphores && !node->_semaphores->to_release.empty()) { | |
_schedule(worker, node->_release_all()); | |
} | |
// Reset the join counter to support the cyclic control flow. | |
// + We must do this before scheduling the successors to avoid race | |
// condition on _dependents. | |
// + We must use fetch_add instead of direct assigning | |
// because the user-space call on "invoke" may explicitly schedule | |
// this task again (e.g., pipeline) which can access the join_counter. | |
if((node->_state.load(std::memory_order_relaxed) & Node::CONDITIONED)) { | |
node->_join_counter.fetch_add(node->num_strong_dependents(), std::memory_order_relaxed); | |
} | |
else { | |
node->_join_counter.fetch_add(node->num_dependents(), std::memory_order_relaxed); | |
} | |
// acquire the parent flow counter | |
auto& j = (node->_parent) ? node->_parent->_join_counter : | |
node->_topology->_join_counter; | |
// Here, we want to cache the latest successor with the highest priority | |
worker._cache = nullptr; | |
auto max_p = static_cast<unsigned>(TaskPriority::MAX); | |
// Invoke the task based on the corresponding type | |
switch(node->_handle.index()) { | |
// condition and multi-condition tasks | |
case Node::CONDITION: | |
case Node::MULTI_CONDITION: { | |
for(auto cond : conds) { | |
if(cond >= 0 && static_cast<size_t>(cond) < node->_successors.size()) { | |
auto s = node->_successors[cond]; | |
// zeroing the join counter for invariant | |
s->_join_counter.store(0, std::memory_order_relaxed); | |
j.fetch_add(1, std::memory_order_relaxed); | |
if(s->_priority <= max_p) { | |
if(worker._cache) { | |
_schedule(worker, worker._cache); | |
} | |
worker._cache = s; | |
max_p = s->_priority; | |
} | |
else { | |
_schedule(worker, s); | |
} | |
} | |
} | |
} | |
break; | |
// non-condition task | |
default: { | |
for(size_t i=0; i<node->_successors.size(); ++i) { | |
//if(auto s = node->_successors[i]; --(s->_join_counter) == 0) { | |
if(auto s = node->_successors[i]; | |
s->_join_counter.fetch_sub(1, std::memory_order_acq_rel) == 1) { | |
j.fetch_add(1, std::memory_order_relaxed); | |
if(s->_priority <= max_p) { | |
if(worker._cache) { | |
_schedule(worker, worker._cache); | |
} | |
worker._cache = s; | |
max_p = s->_priority; | |
} | |
else { | |
_schedule(worker, s); | |
} | |
} | |
} | |
} | |
break; | |
} | |
// tear_down the invoke | |
_tear_down_invoke(worker, node); | |
// perform tail recursion elimination for the right-most child to reduce | |
// the number of expensive pop/push operations through the task queue | |
if(worker._cache) { | |
node = worker._cache; | |
//node->_state.fetch_or(Node::READY, std::memory_order_release); | |
goto begin_invoke; | |
} | |
} | |
// Proecdure: _tear_down_invoke | |
inline void Executor::_tear_down_invoke(Worker& worker, Node* node) { | |
// we must check parent first before substracting the join counter, | |
// or it can introduce data race | |
if(node->_parent == nullptr) { | |
if(node->_topology->_join_counter.fetch_sub(1, std::memory_order_acq_rel) == 1) { | |
_tear_down_topology(worker, node->_topology); | |
} | |
} | |
// joined subflow | |
else { | |
node->_parent->_join_counter.fetch_sub(1, std::memory_order_release); | |
} | |
} | |
// Procedure: _observer_prologue | |
inline void Executor::_observer_prologue(Worker& worker, Node* node) { | |
for(auto& observer : _observers) { | |
observer->on_entry(WorkerView(worker), TaskView(*node)); | |
} | |
} | |
// Procedure: _observer_epilogue | |
inline void Executor::_observer_epilogue(Worker& worker, Node* node) { | |
for(auto& observer : _observers) { | |
observer->on_exit(WorkerView(worker), TaskView(*node)); | |
} | |
} | |
// Procedure: _invoke_static_task | |
inline void Executor::_invoke_static_task(Worker& worker, Node* node) { | |
_observer_prologue(worker, node); | |
auto& work = std::get_if<Node::Static>(&node->_handle)->work; | |
switch(work.index()) { | |
case 0: | |
std::get_if<0>(&work)->operator()(); | |
break; | |
case 1: | |
Runtime rt(*this, worker, node); | |
std::get_if<1>(&work)->operator()(rt); | |
break; | |
} | |
_observer_epilogue(worker, node); | |
} | |
// Procedure: _invoke_dynamic_task | |
inline void Executor::_invoke_dynamic_task(Worker& w, Node* node) { | |
_observer_prologue(w, node); | |
auto handle = std::get_if<Node::Dynamic>(&node->_handle); | |
handle->subgraph._clear(); | |
Subflow sf(*this, w, node, handle->subgraph); | |
handle->work(sf); | |
if(sf._joinable) { | |
_consume_graph(w, node, handle->subgraph); | |
} | |
_observer_epilogue(w, node); | |
} | |
// Procedure: _detach_dynamic_task | |
inline void Executor::_detach_dynamic_task( | |
Worker& w, Node* p, Graph& g | |
) { | |
// graph is empty and has no async tasks | |
if(g.empty() && p->_join_counter.load(std::memory_order_acquire) == 0) { | |
return; | |
} | |
SmallVector<Node*> src; | |
for(auto n : g._nodes) { | |
n->_state.store(Node::DETACHED, std::memory_order_relaxed); | |
n->_set_up_join_counter(); | |
n->_topology = p->_topology; | |
n->_parent = nullptr; | |
if(n->num_dependents() == 0) { | |
src.push_back(n); | |
} | |
} | |
{ | |
std::lock_guard<std::mutex> lock(p->_topology->_taskflow._mutex); | |
p->_topology->_taskflow._graph._merge(std::move(g)); | |
} | |
p->_topology->_join_counter.fetch_add(src.size(), std::memory_order_relaxed); | |
_schedule(w, src); | |
} | |
// Procedure: _consume_graph | |
inline void Executor::_consume_graph(Worker& w, Node* p, Graph& g) { | |
// graph is empty and has no async tasks | |
if(g.empty() && p->_join_counter.load(std::memory_order_acquire) == 0) { | |
return; | |
} | |
SmallVector<Node*> src; | |
for(auto n : g._nodes) { | |
n->_state.store(0, std::memory_order_relaxed); | |
n->_set_up_join_counter(); | |
n->_topology = p->_topology; | |
n->_parent = p; | |
if(n->num_dependents() == 0) { | |
src.push_back(n); | |
} | |
} | |
p->_join_counter.fetch_add(src.size(), std::memory_order_relaxed); | |
_schedule(w, src); | |
_corun_until(w, [p] () -> bool { return p->_join_counter.load(std::memory_order_acquire) == 0; }); | |
} | |
// Procedure: _invoke_condition_task | |
inline void Executor::_invoke_condition_task( | |
Worker& worker, Node* node, SmallVector<int>& conds | |
) { | |
_observer_prologue(worker, node); | |
auto& work = std::get_if<Node::Condition>(&node->_handle)->work; | |
switch(work.index()) { | |
case 0: | |
conds = { std::get_if<0>(&work)->operator()() }; | |
break; | |
case 1: | |
Runtime rt(*this, worker, node); | |
conds = { std::get_if<1>(&work)->operator()(rt) }; | |
break; | |
} | |
_observer_epilogue(worker, node); | |
} | |
// Procedure: _invoke_multi_condition_task | |
inline void Executor::_invoke_multi_condition_task( | |
Worker& worker, Node* node, SmallVector<int>& conds | |
) { | |
_observer_prologue(worker, node); | |
auto& work = std::get_if<Node::MultiCondition>(&node->_handle)->work; | |
switch(work.index()) { | |
case 0: | |
conds = std::get_if<0>(&work)->operator()(); | |
break; | |
case 1: | |
Runtime rt(*this, worker, node); | |
conds = std::get_if<1>(&work)->operator()(rt); | |
break; | |
} | |
_observer_epilogue(worker, node); | |
} | |
// Procedure: _invoke_module_task | |
inline void Executor::_invoke_module_task(Worker& w, Node* node) { | |
_observer_prologue(w, node); | |
_consume_graph( | |
w, node, std::get_if<Node::Module>(&node->_handle)->graph | |
); | |
_observer_epilogue(w, node); | |
} | |
// Procedure: _invoke_async_task | |
inline void Executor::_invoke_async_task(Worker& w, Node* node) { | |
_observer_prologue(w, node); | |
std::get_if<Node::Async>(&node->_handle)->work(); | |
_observer_epilogue(w, node); | |
} | |
// Procedure: _invoke_dependent_async_task | |
inline void Executor::_invoke_dependent_async_task(Worker& w, Node* node) { | |
_observer_prologue(w, node); | |
std::get_if<Node::DependentAsync>(&node->_handle)->work(); | |
_observer_epilogue(w, node); | |
} | |
// Function: run | |
inline tf::Future<void> Executor::run(Taskflow& f) { | |
return run_n(f, 1, [](){}); | |
} | |
// Function: run | |
inline tf::Future<void> Executor::run(Taskflow&& f) { | |
return run_n(std::move(f), 1, [](){}); | |
} | |
// Function: run | |
template <typename C> | |
tf::Future<void> Executor::run(Taskflow& f, C&& c) { | |
return run_n(f, 1, std::forward<C>(c)); | |
} | |
// Function: run | |
template <typename C> | |
tf::Future<void> Executor::run(Taskflow&& f, C&& c) { | |
return run_n(std::move(f), 1, std::forward<C>(c)); | |
} | |
// Function: run_n | |
inline tf::Future<void> Executor::run_n(Taskflow& f, size_t repeat) { | |
return run_n(f, repeat, [](){}); | |
} | |
// Function: run_n | |
inline tf::Future<void> Executor::run_n(Taskflow&& f, size_t repeat) { | |
return run_n(std::move(f), repeat, [](){}); | |
} | |
// Function: run_n | |
template <typename C> | |
tf::Future<void> Executor::run_n(Taskflow& f, size_t repeat, C&& c) { | |
return run_until( | |
f, [repeat]() mutable { return repeat-- == 0; }, std::forward<C>(c) | |
); | |
} | |
// Function: run_n | |
template <typename C> | |
tf::Future<void> Executor::run_n(Taskflow&& f, size_t repeat, C&& c) { | |
return run_until( | |
std::move(f), [repeat]() mutable { return repeat-- == 0; }, std::forward<C>(c) | |
); | |
} | |
// Function: run_until | |
template<typename P> | |
tf::Future<void> Executor::run_until(Taskflow& f, P&& pred) { | |
return run_until(f, std::forward<P>(pred), [](){}); | |
} | |
// Function: run_until | |
template<typename P> | |
tf::Future<void> Executor::run_until(Taskflow&& f, P&& pred) { | |
return run_until(std::move(f), std::forward<P>(pred), [](){}); | |
} | |
// Function: run_until | |
template <typename P, typename C> | |
tf::Future<void> Executor::run_until(Taskflow& f, P&& p, C&& c) { | |
_increment_topology(); | |
// Need to check the empty under the lock since dynamic task may | |
// define detached blocks that modify the taskflow at the same time | |
bool empty; | |
{ | |
std::lock_guard<std::mutex> lock(f._mutex); | |
empty = f.empty(); | |
} | |
// No need to create a real topology but returns an dummy future | |
if(empty || p()) { | |
c(); | |
std::promise<void> promise; | |
promise.set_value(); | |
_decrement_topology_and_notify(); | |
return tf::Future<void>(promise.get_future(), std::monostate{}); | |
} | |
// create a topology for this run | |
auto t = std::make_shared<Topology>(f, std::forward<P>(p), std::forward<C>(c)); | |
// need to create future before the topology got torn down quickly | |
tf::Future<void> future(t->_promise.get_future(), t); | |
// modifying topology needs to be protected under the lock | |
{ | |
std::lock_guard<std::mutex> lock(f._mutex); | |
f._topologies.push(t); | |
if(f._topologies.size() == 1) { | |
_set_up_topology(_this_worker(), t.get()); | |
} | |
} | |
return future; | |
} | |
// Function: run_until | |
template <typename P, typename C> | |
tf::Future<void> Executor::run_until(Taskflow&& f, P&& pred, C&& c) { | |
std::list<Taskflow>::iterator itr; | |
{ | |
std::scoped_lock<std::mutex> lock(_taskflows_mutex); | |
itr = _taskflows.emplace(_taskflows.end(), std::move(f)); | |
itr->_satellite = itr; | |
} | |
return run_until(*itr, std::forward<P>(pred), std::forward<C>(c)); | |
} | |
// Function: corun | |
template <typename T> | |
void Executor::corun(T& target) { | |
auto w = _this_worker(); | |
if(w == nullptr) { | |
TF_THROW("corun must be called by a worker of the executor"); | |
} | |
Node parent; // dummy parent | |
_consume_graph(*w, &parent, target.graph()); | |
} | |
// Function: corun_until | |
template <typename P> | |
void Executor::corun_until(P&& predicate) { | |
auto w = _this_worker(); | |
if(w == nullptr) { | |
TF_THROW("corun_until must be called by a worker of the executor"); | |
} | |
_corun_until(*w, std::forward<P>(predicate)); | |
} | |
// Procedure: _increment_topology | |
inline void Executor::_increment_topology() { | |
std::lock_guard<std::mutex> lock(_topology_mutex); | |
++_num_topologies; | |
} | |
// Procedure: _decrement_topology_and_notify | |
inline void Executor::_decrement_topology_and_notify() { | |
std::lock_guard<std::mutex> lock(_topology_mutex); | |
if(--_num_topologies == 0) { | |
_topology_cv.notify_all(); | |
} | |
} | |
// Procedure: _decrement_topology | |
inline void Executor::_decrement_topology() { | |
std::lock_guard<std::mutex> lock(_topology_mutex); | |
--_num_topologies; | |
} | |
// Procedure: wait_for_all | |
inline void Executor::wait_for_all() { | |
std::unique_lock<std::mutex> lock(_topology_mutex); | |
_topology_cv.wait(lock, [&](){ return _num_topologies == 0; }); | |
} | |
// Function: _set_up_topology | |
inline void Executor::_set_up_topology(Worker* worker, Topology* tpg) { | |
// ---- under taskflow lock ---- | |
tpg->_sources.clear(); | |
tpg->_taskflow._graph._clear_detached(); | |
// scan each node in the graph and build up the links | |
for(auto node : tpg->_taskflow._graph._nodes) { | |
node->_topology = tpg; | |
node->_parent = nullptr; | |
node->_state.store(0, std::memory_order_relaxed); | |
if(node->num_dependents() == 0) { | |
tpg->_sources.push_back(node); | |
} | |
node->_set_up_join_counter(); | |
} | |
tpg->_join_counter.store(tpg->_sources.size(), std::memory_order_relaxed); | |
if(worker) { | |
_schedule(*worker, tpg->_sources); | |
} | |
else { | |
_schedule(tpg->_sources); | |
} | |
} | |
// Function: _tear_down_topology | |
inline void Executor::_tear_down_topology(Worker& worker, Topology* tpg) { | |
auto &f = tpg->_taskflow; | |
//assert(&tpg == &(f._topologies.front())); | |
// case 1: we still need to run the topology again | |
if(!tpg->_is_cancelled && !tpg->_pred()) { | |
//assert(tpg->_join_counter == 0); | |
std::lock_guard<std::mutex> lock(f._mutex); | |
tpg->_join_counter.store(tpg->_sources.size(), std::memory_order_relaxed); | |
_schedule(worker, tpg->_sources); | |
} | |
// case 2: the final run of this topology | |
else { | |
// TODO: if the topology is cancelled, need to release all semaphores | |
if(tpg->_call != nullptr) { | |
tpg->_call(); | |
} | |
// If there is another run (interleave between lock) | |
if(std::unique_lock<std::mutex> lock(f._mutex); f._topologies.size()>1) { | |
//assert(tpg->_join_counter == 0); | |
// Set the promise | |
tpg->_promise.set_value(); | |
f._topologies.pop(); | |
tpg = f._topologies.front().get(); | |
// decrement the topology but since this is not the last we don't notify | |
_decrement_topology(); | |
// set up topology needs to be under the lock or it can | |
// introduce memory order error with pop | |
_set_up_topology(&worker, tpg); | |
} | |
else { | |
//assert(f._topologies.size() == 1); | |
// Need to back up the promise first here becuz taskflow might be | |
// destroy soon after calling get | |
auto p {std::move(tpg->_promise)}; | |
// Back up lambda capture in case it has the topology pointer, | |
// to avoid it releasing on pop_front ahead of _mutex.unlock & | |
// _promise.set_value. Released safely when leaving scope. | |
auto c {std::move(tpg->_call)}; | |
// Get the satellite if any | |
auto s {f._satellite}; | |
// Now we remove the topology from this taskflow | |
f._topologies.pop(); | |
//f._mutex.unlock(); | |
lock.unlock(); | |
// We set the promise in the end in case taskflow leaves the scope. | |
// After set_value, the caller will return from wait | |
p.set_value(); | |
_decrement_topology_and_notify(); | |
// remove the taskflow if it is managed by the executor | |
// TODO: in the future, we may need to synchronize on wait | |
// (which means the following code should the moved before set_value) | |
if(s) { | |
std::scoped_lock<std::mutex> lock(_taskflows_mutex); | |
_taskflows.erase(*s); | |
} | |
} | |
} | |
} | |
// ############################################################################ | |
// Forward Declaration: Subflow | |
// ############################################################################ | |
inline void Subflow::join() { | |
// assert(this_worker().worker == &_worker); | |
if(!_joinable) { | |
TF_THROW("subflow not joinable"); | |
} | |
// only the parent worker can join the subflow | |
_executor._consume_graph(_worker, _parent, _graph); | |
_joinable = false; | |
} | |
inline void Subflow::detach() { | |
// assert(this_worker().worker == &_worker); | |
if(!_joinable) { | |
TF_THROW("subflow already joined or detached"); | |
} | |
// only the parent worker can detach the subflow | |
_executor._detach_dynamic_task(_worker, _parent, _graph); | |
_joinable = false; | |
} | |
// ############################################################################ | |
// Forward Declaration: Runtime | |
// ############################################################################ | |
// Procedure: schedule | |
inline void Runtime::schedule(Task task) { | |
auto node = task._node; | |
// need to keep the invariant: when scheduling a task, the task must have | |
// zero dependency (join counter is 0) | |
// or we can encounter bug when inserting a nested flow (e.g., module task) | |
node->_join_counter.store(0, std::memory_order_relaxed); | |
auto& j = node->_parent ? node->_parent->_join_counter : | |
node->_topology->_join_counter; | |
j.fetch_add(1, std::memory_order_relaxed); | |
_executor._schedule(_worker, node); | |
} | |
// Procedure: corun | |
template <typename T> | |
void Runtime::corun(T&& target) { | |
// dynamic task (subflow) | |
if constexpr(is_dynamic_task_v<T>) { | |
Graph graph; | |
Subflow sf(_executor, _worker, _parent, graph); | |
target(sf); | |
if(sf._joinable) { | |
_executor._consume_graph(_worker, _parent, graph); | |
} | |
} | |
// a composable graph object with `tf::Graph& T::graph()` defined | |
else { | |
_executor._consume_graph(_worker, _parent, target.graph()); | |
} | |
} | |
// Procedure: corun_until | |
template <typename P> | |
void Runtime::corun_until(P&& predicate) { | |
_executor._corun_until(_worker, std::forward<P>(predicate)); | |
} | |
// Function: _silent_async | |
template <typename F> | |
void Runtime::_silent_async(Worker& w, const std::string& name, F&& f) { | |
_parent->_join_counter.fetch_add(1, std::memory_order_relaxed); | |
auto node = node_pool.animate( | |
name, 0, _parent->_topology, _parent, 0, | |
std::in_place_type_t<Node::Async>{}, std::forward<F>(f) | |
); | |
_executor._schedule(w, node); | |
} | |
// Function: silent_async | |
template <typename F> | |
void Runtime::silent_async(F&& f) { | |
_silent_async(*_executor._this_worker(), "", std::forward<F>(f)); | |
} | |
// Function: silent_async | |
template <typename F> | |
void Runtime::silent_async(const std::string& name, F&& f) { | |
_silent_async(*_executor._this_worker(), name, std::forward<F>(f)); | |
} | |
// Function: silent_async_unchecked | |
template <typename F> | |
void Runtime::silent_async_unchecked(const std::string& name, F&& f) { | |
_silent_async(_worker, name, std::forward<F>(f)); | |
} | |
// Function: _async | |
template <typename F> | |
auto Runtime::_async(Worker& w, const std::string& name, F&& f) { | |
_parent->_join_counter.fetch_add(1, std::memory_order_relaxed); | |
using R = std::invoke_result_t<std::decay_t<F>>; | |
std::promise<R> p; | |
auto fu{p.get_future()}; | |
auto node = node_pool.animate( | |
name, 0, _parent->_topology, _parent, 0, | |
std::in_place_type_t<Node::Async>{}, | |
[p=make_moc(std::move(p)), f=std::forward<F>(f)] () mutable { | |
if constexpr(std::is_same_v<R, void>) { | |
f(); | |
p.object.set_value(); | |
} | |
else { | |
p.object.set_value(f()); | |
} | |
} | |
); | |
_executor._schedule(w, node); | |
return fu; | |
} | |
// Function: async | |
template <typename F> | |
auto Runtime::async(F&& f) { | |
return _async(*_executor._this_worker(), "", std::forward<F>(f)); | |
} | |
// Function: async | |
template <typename F> | |
auto Runtime::async(const std::string& name, F&& f) { | |
return _async(*_executor._this_worker(), name, std::forward<F>(f)); | |
} | |
// Function: join | |
inline void Runtime::join() { | |
corun_until([this] () -> bool { | |
return _parent->_join_counter.load(std::memory_order_acquire) == 0; | |
}); | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
// https://hackmd.io/@sysprog/concurrency-atomics | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// Async | |
// ---------------------------------------------------------------------------- | |
// Function: async | |
template <typename F> | |
auto Executor::async(const std::string& name, F&& f) { | |
_increment_topology(); | |
using R = std::invoke_result_t<std::decay_t<F>>; | |
std::promise<R> p; | |
auto fu{p.get_future()}; | |
auto node = node_pool.animate( | |
name, 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::Async>{}, | |
_make_promised_async(std::move(p), std::forward<F>(f)) | |
); | |
_schedule_async_task(node); | |
return fu; | |
} | |
// Function: async | |
template <typename F> | |
auto Executor::async(F&& f) { | |
return async("", std::forward<F>(f)); | |
} | |
// ---------------------------------------------------------------------------- | |
// Silent Async | |
// ---------------------------------------------------------------------------- | |
// Function: silent_async | |
template <typename F> | |
void Executor::silent_async(const std::string& name, F&& f) { | |
_increment_topology(); | |
auto node = node_pool.animate( | |
name, 0, nullptr, nullptr, 0, | |
std::in_place_type_t<Node::Async>{}, std::forward<F>(f) | |
); | |
_schedule_async_task(node); | |
} | |
// Function: silent_async | |
template <typename F> | |
void Executor::silent_async(F&& f) { | |
silent_async("", std::forward<F>(f)); | |
} | |
// ---------------------------------------------------------------------------- | |
// Async Helper Methods | |
// ---------------------------------------------------------------------------- | |
// Function: _make_promised_async | |
template <typename R, typename F> | |
auto Executor::_make_promised_async(std::promise<R>&& p, F&& func) { | |
return [p=make_moc(std::move(p)), func=std::forward<F>(func)]() mutable { | |
if constexpr(std::is_same_v<R, void>) { | |
func(); | |
p.object.set_value(); | |
} | |
else { | |
p.object.set_value(func()); | |
} | |
}; | |
} | |
// Procedure: _schedule_async_task | |
inline void Executor::_schedule_async_task(Node* node) { | |
if(auto w = _this_worker(); w) { | |
_schedule(*w, node); | |
} | |
else{ | |
_schedule(node); | |
} | |
} | |
// Procedure: _tear_down_async | |
inline void Executor::_tear_down_async(Node* node) { | |
// from runtime | |
if(node->_parent) { | |
node->_parent->_join_counter.fetch_sub(1, std::memory_order_release); | |
} | |
// from executor | |
else { | |
_decrement_topology_and_notify(); | |
} | |
node_pool.recycle(node); | |
} | |
// ---------------------------------------------------------------------------- | |
// Silent Dependent Async | |
// ---------------------------------------------------------------------------- | |
// Function: silent_dependent_async | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* | |
> | |
tf::AsyncTask Executor::silent_dependent_async(F&& func, Tasks&&... tasks) { | |
return silent_dependent_async("", std::forward<F>(func), std::forward<Tasks>(tasks)...); | |
} | |
// Function: silent_dependent_async | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* | |
> | |
tf::AsyncTask Executor::silent_dependent_async( | |
const std::string& name, F&& func, Tasks&&... tasks | |
){ | |
_increment_topology(); | |
size_t num_dependents = sizeof...(Tasks); | |
std::shared_ptr<Node> node( | |
node_pool.animate( | |
name, 0, nullptr, nullptr, num_dependents, | |
std::in_place_type_t<Node::DependentAsync>{}, std::forward<F>(func) | |
), | |
[&](Node* ptr){ node_pool.recycle(ptr); } | |
); | |
{ | |
std::scoped_lock lock(_asyncs_mutex); | |
_asyncs.insert(node); | |
} | |
if constexpr(sizeof...(Tasks) > 0) { | |
(_process_async_dependent(node.get(), tasks, num_dependents), ...); | |
} | |
if(num_dependents == 0) { | |
_schedule_async_task(node.get()); | |
} | |
return AsyncTask(std::move(node)); | |
} | |
// Function: silent_dependent_async | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* | |
> | |
tf::AsyncTask Executor::silent_dependent_async(F&& func, I first, I last) { | |
return silent_dependent_async("", std::forward<F>(func), first, last); | |
} | |
// Function: silent_dependent_async | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* | |
> | |
tf::AsyncTask Executor::silent_dependent_async( | |
const std::string& name, F&& func, I first, I last | |
) { | |
_increment_topology(); | |
size_t num_dependents = std::distance(first, last); | |
std::shared_ptr<Node> node( | |
node_pool.animate( | |
name, 0, nullptr, nullptr, num_dependents, | |
std::in_place_type_t<Node::DependentAsync>{}, std::forward<F>(func) | |
), | |
[&](Node* ptr){ node_pool.recycle(ptr); } | |
); | |
{ | |
std::scoped_lock lock(_asyncs_mutex); | |
_asyncs.insert(node); | |
} | |
for(; first != last; first++){ | |
_process_async_dependent(node.get(), *first, num_dependents); | |
} | |
if(num_dependents == 0) { | |
_schedule_async_task(node.get()); | |
} | |
return AsyncTask(std::move(node)); | |
} | |
// ---------------------------------------------------------------------------- | |
// Dependent Async | |
// ---------------------------------------------------------------------------- | |
// Function: dependent_async | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* | |
> | |
auto Executor::dependent_async(F&& func, Tasks&&... tasks) { | |
return dependent_async("", std::forward<F>(func), std::forward<Tasks>(tasks)...); | |
} | |
// Function: dependent_async | |
template <typename F, typename... Tasks, | |
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* | |
> | |
auto Executor::dependent_async( | |
const std::string& name, F&& func, Tasks&&... tasks | |
) { | |
_increment_topology(); | |
using R = std::invoke_result_t<std::decay_t<F>>; | |
std::promise<R> p; | |
auto fu{p.get_future()}; | |
size_t num_dependents = sizeof...(tasks); | |
std::shared_ptr<Node> node( | |
node_pool.animate( | |
name, 0, nullptr, nullptr, num_dependents, | |
std::in_place_type_t<Node::DependentAsync>{}, | |
_make_promised_async(std::move(p), std::forward<F>(func)) | |
), | |
[&](Node* ptr){ node_pool.recycle(ptr); } | |
); | |
{ | |
std::scoped_lock lock(_asyncs_mutex); | |
_asyncs.insert(node); | |
} | |
if constexpr(sizeof...(Tasks) > 0) { | |
(_process_async_dependent(node.get(), tasks, num_dependents), ...); | |
} | |
if(num_dependents == 0) { | |
_schedule_async_task(node.get()); | |
} | |
return std::make_pair(AsyncTask(std::move(node)), std::move(fu)); | |
} | |
// Function: dependent_async | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* | |
> | |
auto Executor::dependent_async(F&& func, I first, I last) { | |
return dependent_async("", std::forward<F>(func), first, last); | |
} | |
// Function: dependent_async | |
template <typename F, typename I, | |
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* | |
> | |
auto Executor::dependent_async( | |
const std::string& name, F&& func, I first, I last | |
) { | |
_increment_topology(); | |
using R = std::invoke_result_t<std::decay_t<F>>; | |
std::promise<R> p; | |
auto fu{p.get_future()}; | |
size_t num_dependents = std::distance(first, last); | |
std::shared_ptr<Node> node( | |
node_pool.animate( | |
name, 0, nullptr, nullptr, num_dependents, | |
std::in_place_type_t<Node::DependentAsync>{}, | |
_make_promised_async(std::move(p), std::forward<F>(func)) | |
), | |
[&](Node* ptr){ node_pool.recycle(ptr); } | |
); | |
{ | |
std::scoped_lock lock(_asyncs_mutex); | |
_asyncs.insert(node); | |
} | |
for(; first != last; first++) { | |
_process_async_dependent(node.get(), *first, num_dependents); | |
} | |
if(num_dependents == 0) { | |
_schedule_async_task(node.get()); | |
} | |
return std::make_pair(AsyncTask(std::move(node)), std::move(fu)); | |
} | |
// ---------------------------------------------------------------------------- | |
// Dependent Async Helper Functions | |
// ---------------------------------------------------------------------------- | |
// Procedure: _process_async_dependent | |
inline void Executor::_process_async_dependent( | |
Node* node, tf::AsyncTask& task, size_t& num_dependents | |
) { | |
std::shared_ptr<Node> dep; | |
{ | |
std::scoped_lock lock(_asyncs_mutex); | |
if(auto itr = _asyncs.find(task._node); itr != _asyncs.end()){ | |
dep = *itr; | |
} | |
} | |
// if the dependent task exists | |
if(dep) { | |
auto& state = std::get_if<Node::DependentAsync>(&(dep->_handle))->state; | |
add_dependent: | |
auto target = Node::AsyncState::UNFINISHED; | |
// acquires the lock | |
if(state.compare_exchange_weak(target, Node::AsyncState::LOCKED, | |
std::memory_order_acq_rel, | |
std::memory_order_acquire)) { | |
dep->_successors.push_back(node); | |
state.store(Node::AsyncState::UNFINISHED, std::memory_order_release); | |
} | |
// dep's state is FINISHED, which means dep finished its callable already | |
// thus decrement the node's join counter by 1 | |
else if (target == Node::AsyncState::FINISHED) { | |
// decrement the counter needs to be the order of acquire and release | |
// to synchronize with the worker | |
num_dependents = node->_join_counter.fetch_sub(1, std::memory_order_acq_rel) - 1; | |
} | |
// another worker adding an async task that shares the same dependent | |
else { | |
goto add_dependent; | |
} | |
} | |
else { | |
num_dependents = node->_join_counter.fetch_sub(1, std::memory_order_acq_rel) - 1; | |
} | |
} | |
// Procedure: _tear_down_dependent_async | |
inline void Executor::_tear_down_dependent_async(Worker& worker, Node* node) { | |
// this async task comes from Executor | |
auto& state = std::get_if<Node::DependentAsync>(&(node->_handle))->state; | |
auto target = Node::AsyncState::UNFINISHED; | |
while(!state.compare_exchange_weak(target, Node::AsyncState::FINISHED, | |
std::memory_order_acq_rel, | |
std::memory_order_relaxed)) { | |
target = Node::AsyncState::UNFINISHED; | |
} | |
// spaw successors whenever their dependencies are resolved | |
worker._cache = nullptr; | |
for(size_t i=0; i<node->_successors.size(); ++i) { | |
//if(auto s = node->_successors[i]; --(s->_join_counter) == 0) { | |
if(auto s = node->_successors[i]; | |
s->_join_counter.fetch_sub(1, std::memory_order_acq_rel) == 1 | |
) { | |
if(worker._cache) { | |
_schedule(worker, worker._cache); | |
} | |
worker._cache = s; | |
} | |
} | |
// remove myself from the asyncs using extraction to avoid calling | |
// ~Node inside the lock | |
typename std::unordered_set<std::shared_ptr<Node>>::node_type extracted; | |
{ | |
std::shared_ptr<Node> ptr(node, [](Node*){}); | |
std::scoped_lock lock(_asyncs_mutex); | |
extracted = _asyncs.extract(ptr); | |
// assert(extracted.empty() == false); | |
} | |
_decrement_topology_and_notify(); | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
/** | |
@file critical.hpp | |
@brief critical include file | |
*/ | |
namespace tf { | |
// ---------------------------------------------------------------------------- | |
// CriticalSection | |
// ---------------------------------------------------------------------------- | |
/** | |
@class CriticalSection | |
@brief class to create a critical region of limited workers to run tasks | |
tf::CriticalSection is a warpper over tf::Semaphore and is specialized for | |
limiting the maximum concurrency over a set of tasks. | |
A critical section starts with an initial count representing that limit. | |
When a task is added to the critical section, | |
the task acquires and releases the semaphore internal to the critical section. | |
This design avoids explicit call of tf::Task::acquire and tf::Task::release. | |
The following example creates a critical section of one worker and adds | |
the five tasks to the critical section. | |
@code{.cpp} | |
tf::Executor executor(8); // create an executor of 8 workers | |
tf::Taskflow taskflow; | |
// create a critical section of 1 worker | |
tf::CriticalSection critical_section(1); | |
tf::Task A = taskflow.emplace([](){ std::cout << "A" << std::endl; }); | |
tf::Task B = taskflow.emplace([](){ std::cout << "B" << std::endl; }); | |
tf::Task C = taskflow.emplace([](){ std::cout << "C" << std::endl; }); | |
tf::Task D = taskflow.emplace([](){ std::cout << "D" << std::endl; }); | |
tf::Task E = taskflow.emplace([](){ std::cout << "E" << std::endl; }); | |
critical_section.add(A, B, C, D, E); | |
executor.run(taskflow).wait(); | |
@endcode | |
*/ | |
class CriticalSection : public Semaphore { | |
public: | |
/** | |
@brief constructs a critical region of a limited number of workers | |
*/ | |
explicit CriticalSection(size_t max_workers = 1); | |
/** | |
@brief adds a task into the critical region | |
*/ | |
template <typename... Tasks> | |
void add(Tasks...tasks); | |
}; | |
inline CriticalSection::CriticalSection(size_t max_workers) : | |
Semaphore {max_workers} { | |
} | |
template <typename... Tasks> | |
void CriticalSection::add(Tasks... tasks) { | |
(tasks.acquire(*this), ...); | |
(tasks.release(*this), ...); | |
} | |
} // end of namespace tf. --------------------------------------------------- | |
/** | |
@dir taskflow | |
@brief root taskflow include dir | |
*/ | |
/** | |
@dir taskflow/core | |
@brief taskflow core include dir | |
*/ | |
/** | |
@dir taskflow/algorithm | |
@brief taskflow algorithms include dir | |
*/ | |
/** | |
@dir taskflow/cuda | |
@brief taskflow CUDA include dir | |
*/ | |
/** | |
@file taskflow/taskflow.hpp | |
@brief main taskflow include file | |
*/ | |
// TF_VERSION % 100 is the patch level | |
// TF_VERSION / 100 % 1000 is the minor version | |
// TF_VERSION / 100000 is the major version | |
// current version: 3.6.0 | |
#define TF_VERSION 300600 | |
#define TF_MAJOR_VERSION TF_VERSION/100000 | |
#define TF_MINOR_VERSION TF_VERSION/100%1000 | |
#define TF_PATCH_VERSION TF_VERSION%100 | |
/** | |
@brief taskflow namespace | |
*/ | |
namespace tf { | |
/** | |
@private | |
*/ | |
namespace detail { } | |
/** | |
@brief queries the version information in a string format @c major.minor.patch | |
Release notes are available here: https://taskflow.github.io/taskflow/Releases.html | |
*/ | |
constexpr const char* version() { | |
return "3.6.0"; | |
} | |
} // end of namespace tf ----------------------------------------------------- | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment