Skip to content

Instantly share code, notes, and snippets.

@gyakoo
Created October 18, 2016 22:37
Show Gist options
  • Save gyakoo/a0abf36c3c7abdbbbb298acfa994e60d to your computer and use it in GitHub Desktop.
Save gyakoo/a0abf36c3c7abdbbbb298acfa994e60d to your computer and use it in GitHub Desktop.
Profile 'min' search using sequential, parallel_for_each, combinable and atomic in a concurrent_vector
// The following test measures time to find the minimum value in a huge array using:
// sequential
// parallel_for_each
// parallel_for_each + combinable<>
// VS2015 Update 3
static char toClearCache[256 << 20]; //try to flush!?
static void clearCache()
{
for (int i = 0; i < ARRAYSIZE(toClearCache); ++i)
{
char b = toClearCache[i];
toClearCache[i] = 0xbaad + b;
}
}
template<typename T>
void UpdateMinimum(std::atomic<T>& minValue, T const& value) noexcept
{
T prev_value = minValue;
while (prev_value < value && !minValue.compare_exchange_weak(prev_value, value))
;
}
// DO THE TEST
{
struct MyStruct
{
void update()
{
valToCompare = std::hash<int>()(seed);
}
int valToCompare;
int seed;
};
// FILL THE VECTOR
static const size_t COUNT = 256 << 20;
concurrency::concurrent_vector<MyStruct> myVector;
myVector.resize(COUNT);
concurrency::parallel_for(size_t(0), COUNT, [&myVector](size_t i)
{
myVector[i].seed = i;
});
// PARALLEL MIN (combinable)
auto start = GetTickCount64();
concurrency::combinable<int> minValue([] {return INT_MAX; });
concurrency::parallel_for_each(myVector.begin(), myVector.end(), [&](auto& elm)
{
elm.update();
int& localMin = minValue.local();
localMin = std::min(localMin, elm.valToCompare);
});
int minParallelComb = minValue.combine([&](auto& a, auto& b) { return std::min(a, b); });
unsigned long long timeParallelComb = GetTickCount64() - start;
clearCache();
// PARALLEL MIN (normal, atomic)
start = GetTickCount64();
std::atomic<int> minParallelAtomic = INT_MAX;
concurrency::parallel_for_each(myVector.begin(), myVector.end(), [&](auto& elm)
{
elm.update();
UpdateMinimum(minParallelAtomic, elm.valToCompare);
});
unsigned long long timeParallelAtomic = GetTickCount64() - start;
clearCache();
// PARALLEL MIN (normal, NO atomic, WRONG)
start = GetTickCount64();
int minParallelWrong = INT_MAX;
concurrency::parallel_for_each(myVector.begin(), myVector.end(), [&](auto& elm)
{
elm.update();
minParallelWrong = std::min(minParallelWrong, elm.valToCompare);
});
unsigned long long timeParallelWrong = GetTickCount64() - start;
clearCache();
// SEQUENTIAL MIN
start = GetTickCount64();
int minSequential = INT_MAX;
for (auto& elm : myVector)
{
elm.update();
minSequential = std::min(minSequential, elm.valToCompare);
}
unsigned long long timeSequential= GetTickCount64() - start;
wchar_t str[256];
swprintf_s(str, L"%lld sequential\n%lld parallel comb\n%lld parallel (no atomic)\n%lld parallel (atomic)", timeSequential, timeParallelComb, timeParallelWrong, timeParallelAtomic);
OutputDebugStringW(str);
}
// Output is (in ms):
//
// 593 sequential
// 734 parallel comb
// 5500 parallel (no atomic)
// 312 parallel (atomic)
// Did you expect the same results?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment