Created
June 13, 2017 06:39
-
-
Save BillyONeal/f4bf169063a42a305751a4feaca0d0f1 to your computer and use it in GitHub Desktop.
Poor Reverse Perf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// PARALLEL FUNCTION TEMPLATE reverse | |
template<class _BidIt> | |
struct _Reverse_partition | |
{ | |
_BidIt _First; | |
_BidIt _Stop_at; | |
_BidIt _Last; | |
}; | |
template<class _BidIt, | |
bool = is_base_of<random_access_iterator_tag, _Iter_cat_t<_BidIt>>::value> | |
struct _Static_reverse_partition_set | |
{ // reverse partition for random-access iterators | |
_BidIt _Start_at; | |
_BidIt _End_at; | |
size_t _Chunk_size; | |
size_t _Unchunked_items; | |
_Static_reverse_partition_set(_BidIt& _First, _BidIt& _Last, const size_t _Count, const size_t _Chunks) | |
: _Start_at(_First), | |
_End_at(_Last), | |
_Chunk_size(_Count / _Chunks), | |
_Unchunked_items(_Count % _Chunks) | |
{ // prepare a static partitioned reverse operation | |
// pre: _Count >= _Chunks | |
// pre: _Chunks >= 1 | |
_First += _Count; | |
_Last -= _Count; | |
} | |
_Reverse_partition<_BidIt> operator[](const size_t _This_chunk) const | |
{ // get the indicated chunk | |
const auto _Offsets = _Static_partition_offset<_Iter_diff_t<_BidIt>>( | |
_This_chunk, _Chunk_size, _Unchunked_items); | |
const auto _First = _Start_at + _Offsets.first; | |
return {_First, _First + _Offsets.second, _End_at - _Offsets.first}; | |
} | |
}; | |
template<class _BidIt> | |
struct _Static_reverse_partition_set<_BidIt, false> | |
{ // reverse partition for bidirectional iterators | |
vector<pair<_BidIt, _BidIt>> _Partitions; | |
_Static_reverse_partition_set(_BidIt& _First, _BidIt& _Last, const size_t _Count, const size_t _Chunks) | |
: _Partitions(_Do_partition(_First, _Last, _Count, _Chunks)) | |
{ // prepare a static partitioned reverse operation | |
// pre: _Count >= _Chunks | |
// pre: _Chunks >= 1 | |
} | |
_Reverse_partition<_BidIt> operator[](const size_t _This_chunk) const | |
{ // get the indicated chunk | |
return {_Partitions[_This_chunk].first, | |
_Partitions[_This_chunk + 1].first, _Partitions[_This_chunk].second}; | |
} | |
private: | |
static vector<pair<_BidIt, _BidIt>> _Do_partition(_BidIt& _First, _BidIt& _Last, | |
const size_t _Count, const size_t _Chunks) | |
{ // prepare a reverse operation on a bidirectional range | |
// pre: _Count >= _Chunks | |
// pre: _Chunks >= 1 | |
const size_t _Chunk_size = _Count / _Chunks; | |
const size_t _Unchunked_items = _Count % _Chunks; | |
vector<pair<_BidIt, _BidIt>> _Results(_Chunks + 1); | |
auto _Result = _Results.begin(); | |
*_Result = {_First, _Last}; | |
for (size_t _Idx = 0; _Idx < _Unchunked_items; ++_Idx) | |
{ // record bounds of chunks with an extra item | |
_STD advance(_First, _Chunk_size + 1); | |
_STD advance(_Last, -static_cast<ptrdiff_t>(_Chunk_size) - 1); // FIXME | |
*++_Result = {_First, _Last}; | |
} | |
for (size_t _Idx = _Unchunked_items; _Idx < _Chunks; ++_Idx) | |
{ // record bounds of chunks with no extra item | |
_STD advance(_First, _Chunk_size); | |
_STD advance(_First, -static_cast<ptrdiff_t>(_Chunk_size)); | |
*++_Result = {_First, _Last}; | |
} | |
return (_Results); | |
} | |
}; | |
template<class _BidIt> | |
struct _Static_partitioned_reverse | |
{ // reverse task scheduled on the system thread pool | |
const _Static_reverse_partition_set<_BidIt> _Partitions; | |
atomic<size_t> _Highest_chunk; | |
_Static_partitioned_reverse(_BidIt& _First, _BidIt& _Last, const size_t _Count, const size_t _Chunks) | |
: _Partitions(_First, _Last, _Count, _Chunks), | |
_Highest_chunk{0} | |
{ | |
} | |
static void __stdcall _Threadpool_callback(__std_PTP_CALLBACK_INSTANCE, void * _Context, | |
__std_PTP_WORK) _NOEXCEPT // enforces termination | |
{ // callback from the system thread pool to do work | |
auto * const _This = static_cast<_Static_partitioned_reverse *>(_Context); | |
auto _This_chunk = _This->_Partitions[_This->_Highest_chunk++]; | |
for (; _This_chunk._First != _This_chunk._Stop_at; ++_This_chunk._First) | |
{ | |
_STD iter_swap(_This_chunk._First, --_This_chunk._Last); | |
} | |
} | |
_Work_ptr _Create_work() | |
{ // registers *this with the system thread pool | |
_Work_ptr _Result{__std_CreateThreadpoolWork(&_Threadpool_callback, this, 0)}; | |
if (!_Result) | |
{ | |
_THROW(bad_alloc{}); | |
} | |
return (_Result); | |
} | |
}; | |
template<class _BidIt> | |
void _Reverse_unchecked(_Sequenced_policy_tag, _BidIt _First, _BidIt _Last) _NOEXCEPT // enforces termination | |
{ // reverse with termination | |
_Reverse_unchecked(_First, _Last); | |
} | |
template<class _BidIt> | |
void _Reverse_unchecked(_Parallel_policy_tag, _BidIt _First, _BidIt _Last) | |
{ // reverse in parallel | |
const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); | |
size_t _Count; | |
if (_Hw_threads <= 1 || (_Count = _STD distance(_First, _Last)) <= 3) | |
{ // don't parallelize on uniprocessor machines or at most one swap | |
_Reverse_unchecked(execution::seq, _First, _Last); | |
return; | |
} | |
const size_t _Swaps = _Count >> 1; // TRANSITION, VSO#433486 | |
const size_t _Threads = _Min_value(_Hw_threads, _Swaps); | |
const size_t _Foreground_work = _Swaps / _Threads + (_Swaps % _Threads != 0); | |
const size_t _Background_work = _Swaps - _Foreground_work; | |
const size_t _Background_chunks = _Threads - 1; | |
_Static_partitioned_reverse<_BidIt> _Operation{_First, _Last, _Background_work, _Background_chunks}; | |
const _Work_ptr _Work{_Operation._Create_work()}; | |
for (size_t _Idx = 1; _Idx < _Background_chunks; ++_Idx) | |
{ | |
__std_SubmitThreadpoolWork(_Work.get()); | |
} | |
_Reverse_unchecked(execution::seq, _First, _Last); | |
__std_WaitForThreadpoolWorkCallbacks(_Work.get(), false); | |
} | |
template<class _ExecutionPolicy, | |
class _BidIt, | |
_Enable_if_execution_policy_t<_ExecutionPolicy> = 0> inline | |
void reverse(_ExecutionPolicy&& _Exec, const _BidIt _First, const _BidIt _Last) | |
{ // reverse a range with supplied execution policy | |
_DEBUG_RANGE(_First, _Last); | |
_Reverse_unchecked(_STD forward<_ExecutionPolicy>(_Exec), _Unchecked(_First), _Unchecked(_Last)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment