Skip to content

Instantly share code, notes, and snippets.

@BillyONeal
Created June 13, 2017 06:39
Show Gist options
  • Save BillyONeal/f4bf169063a42a305751a4feaca0d0f1 to your computer and use it in GitHub Desktop.
Save BillyONeal/f4bf169063a42a305751a4feaca0d0f1 to your computer and use it in GitHub Desktop.
Poor Reverse Perf
// PARALLEL FUNCTION TEMPLATE reverse
template<class _BidIt>
struct _Reverse_partition
{
_BidIt _First;
_BidIt _Stop_at;
_BidIt _Last;
};
template<class _BidIt,
bool = is_base_of<random_access_iterator_tag, _Iter_cat_t<_BidIt>>::value>
struct _Static_reverse_partition_set
{ // reverse partition for random-access iterators
_BidIt _Start_at;
_BidIt _End_at;
size_t _Chunk_size;
size_t _Unchunked_items;
_Static_reverse_partition_set(_BidIt& _First, _BidIt& _Last, const size_t _Count, const size_t _Chunks)
: _Start_at(_First),
_End_at(_Last),
_Chunk_size(_Count / _Chunks),
_Unchunked_items(_Count % _Chunks)
{ // prepare a static partitioned reverse operation
// pre: _Count >= _Chunks
// pre: _Chunks >= 1
_First += _Count;
_Last -= _Count;
}
_Reverse_partition<_BidIt> operator[](const size_t _This_chunk) const
{ // get the indicated chunk
const auto _Offsets = _Static_partition_offset<_Iter_diff_t<_BidIt>>(
_This_chunk, _Chunk_size, _Unchunked_items);
const auto _First = _Start_at + _Offsets.first;
return {_First, _First + _Offsets.second, _End_at - _Offsets.first};
}
};
template<class _BidIt>
struct _Static_reverse_partition_set<_BidIt, false>
{ // reverse partition for bidirectional iterators
vector<pair<_BidIt, _BidIt>> _Partitions;
_Static_reverse_partition_set(_BidIt& _First, _BidIt& _Last, const size_t _Count, const size_t _Chunks)
: _Partitions(_Do_partition(_First, _Last, _Count, _Chunks))
{ // prepare a static partitioned reverse operation
// pre: _Count >= _Chunks
// pre: _Chunks >= 1
}
_Reverse_partition<_BidIt> operator[](const size_t _This_chunk) const
{ // get the indicated chunk
return {_Partitions[_This_chunk].first,
_Partitions[_This_chunk + 1].first, _Partitions[_This_chunk].second};
}
private:
static vector<pair<_BidIt, _BidIt>> _Do_partition(_BidIt& _First, _BidIt& _Last,
const size_t _Count, const size_t _Chunks)
{ // prepare a reverse operation on a bidirectional range
// pre: _Count >= _Chunks
// pre: _Chunks >= 1
const size_t _Chunk_size = _Count / _Chunks;
const size_t _Unchunked_items = _Count % _Chunks;
vector<pair<_BidIt, _BidIt>> _Results(_Chunks + 1);
auto _Result = _Results.begin();
*_Result = {_First, _Last};
for (size_t _Idx = 0; _Idx < _Unchunked_items; ++_Idx)
{ // record bounds of chunks with an extra item
_STD advance(_First, _Chunk_size + 1);
_STD advance(_Last, -static_cast<ptrdiff_t>(_Chunk_size) - 1); // FIXME
*++_Result = {_First, _Last};
}
for (size_t _Idx = _Unchunked_items; _Idx < _Chunks; ++_Idx)
{ // record bounds of chunks with no extra item
_STD advance(_First, _Chunk_size);
_STD advance(_First, -static_cast<ptrdiff_t>(_Chunk_size));
*++_Result = {_First, _Last};
}
return (_Results);
}
};
template<class _BidIt>
struct _Static_partitioned_reverse
{ // reverse task scheduled on the system thread pool
const _Static_reverse_partition_set<_BidIt> _Partitions;
atomic<size_t> _Highest_chunk;
_Static_partitioned_reverse(_BidIt& _First, _BidIt& _Last, const size_t _Count, const size_t _Chunks)
: _Partitions(_First, _Last, _Count, _Chunks),
_Highest_chunk{0}
{
}
static void __stdcall _Threadpool_callback(__std_PTP_CALLBACK_INSTANCE, void * _Context,
__std_PTP_WORK) _NOEXCEPT // enforces termination
{ // callback from the system thread pool to do work
auto * const _This = static_cast<_Static_partitioned_reverse *>(_Context);
auto _This_chunk = _This->_Partitions[_This->_Highest_chunk++];
for (; _This_chunk._First != _This_chunk._Stop_at; ++_This_chunk._First)
{
_STD iter_swap(_This_chunk._First, --_This_chunk._Last);
}
}
_Work_ptr _Create_work()
{ // registers *this with the system thread pool
_Work_ptr _Result{__std_CreateThreadpoolWork(&_Threadpool_callback, this, 0)};
if (!_Result)
{
_THROW(bad_alloc{});
}
return (_Result);
}
};
template<class _BidIt>
void _Reverse_unchecked(_Sequenced_policy_tag, _BidIt _First, _BidIt _Last) _NOEXCEPT // enforces termination
{ // reverse with termination
_Reverse_unchecked(_First, _Last);
}
template<class _BidIt>
void _Reverse_unchecked(_Parallel_policy_tag, _BidIt _First, _BidIt _Last)
{ // reverse in parallel
const size_t _Hw_threads = __std_parallel_algorithms_hw_threads();
size_t _Count;
if (_Hw_threads <= 1 || (_Count = _STD distance(_First, _Last)) <= 3)
{ // don't parallelize on uniprocessor machines or at most one swap
_Reverse_unchecked(execution::seq, _First, _Last);
return;
}
const size_t _Swaps = _Count >> 1; // TRANSITION, VSO#433486
const size_t _Threads = _Min_value(_Hw_threads, _Swaps);
const size_t _Foreground_work = _Swaps / _Threads + (_Swaps % _Threads != 0);
const size_t _Background_work = _Swaps - _Foreground_work;
const size_t _Background_chunks = _Threads - 1;
_Static_partitioned_reverse<_BidIt> _Operation{_First, _Last, _Background_work, _Background_chunks};
const _Work_ptr _Work{_Operation._Create_work()};
for (size_t _Idx = 1; _Idx < _Background_chunks; ++_Idx)
{
__std_SubmitThreadpoolWork(_Work.get());
}
_Reverse_unchecked(execution::seq, _First, _Last);
__std_WaitForThreadpoolWorkCallbacks(_Work.get(), false);
}
template<class _ExecutionPolicy,
class _BidIt,
_Enable_if_execution_policy_t<_ExecutionPolicy> = 0> inline
void reverse(_ExecutionPolicy&& _Exec, const _BidIt _First, const _BidIt _Last)
{ // reverse a range with supplied execution policy
_DEBUG_RANGE(_First, _Last);
_Reverse_unchecked(_STD forward<_ExecutionPolicy>(_Exec), _Unchecked(_First), _Unchecked(_Last));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment