Skip to content

Instantly share code, notes, and snippets.

@StephanDollberg
Last active December 16, 2015 09:19
Show Gist options
  • Save StephanDollberg/5412012 to your computer and use it in GitHub Desktop.
Save StephanDollberg/5412012 to your computer and use it in GitHub Desktop.
parallel_find using Intel TBB
#include <boost/range/irange.hpp>
#include <boost/range/algorithm.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/median.hpp>
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/task.h>
#include <tbb/atomic.h>
#include <tbb/tick_count.h>
#include <algorithm>
#include <iostream>
#include <vector>
//Note: All thresholds are random
template<typename Iter, typename T>
struct find_task : tbb::task {
const T& value;
tbb::atomic<bool>& done_ptr;
Iter begin;
Iter end;
Iter& result_iter;
find_task(const T& search_val, tbb::atomic<bool>& done_flag, Iter b, Iter e, Iter& f_ptr)
: value(search_val), done_ptr(done_flag), begin(b), end(e), result_iter(f_ptr) {}
tbb::task* execute() {
if(done_ptr == true) {
return NULL;
}
if(std::distance(begin, end) < 10000) {
result_iter = std::find(begin, end, value);
}
else {
Iter middle = std::next(begin, std::distance(begin, end) / 2);
Iter left_result_iter = middle;
Iter right_result_iter = end;
find_task<Iter, T>& left
= *new(allocate_child())
find_task<Iter, T>(value, done_ptr, begin, middle, left_result_iter);
find_task<Iter, T>& right
= *new(allocate_child())
find_task<Iter, T>(value, done_ptr, middle, end, right_result_iter);
set_ref_count(3);
spawn(right);
spawn_and_wait_for_all(left);
if(left_result_iter != middle) {
result_iter = left_result_iter;
done_ptr = true;
}
else if(right_result_iter != end) {
result_iter = right_result_iter;
done_ptr = true;
}
}
return NULL;
}
};
// version 1 using task constructs
template<typename Iter, typename T>
Iter parallel_find(Iter begin, Iter end, const T& value) {
tbb::atomic<bool> done;
done = false;
Iter found_ptr(end);
find_task<Iter, T>& root
= *new(tbb::task::allocate_root())
find_task<Iter, T>(value, done, begin, end, found_ptr);
tbb::task::spawn_root_and_wait(root);
return found_ptr;
}
template<typename Iter, typename T>
struct parallel_find2_helper {
tbb::task_group_context& context;
tbb::atomic<std::size_t>& index;
const T& value;
Iter begin;
parallel_find2_helper(tbb::task_group_context& c, tbb::atomic<std::size_t>& i, const T& t, Iter b)
: context(c), index(i), value(t), begin(b) {}
void operator()(tbb::blocked_range<Iter> range) const {
Iter iter = std::find(range.begin(), range.end(), value);
if(iter != range.end()) {
index = std::distance(begin, iter);
context.cancel_group_execution();
}
}
};
// version 2 using parallel_for with task_group_context
template<typename Iter, typename T>
Iter parallel_find2(Iter begin, Iter end, const T& value) {
tbb::task_group_context context;
tbb::atomic<std::size_t> index;
index = std::numeric_limits<std::size_t>::max();
parallel_find2_helper<Iter, T> worker(context, index, value, begin);
tbb::parallel_for(tbb::blocked_range<Iter>(begin, end, 10000), worker, context);
std::size_t result_index = index.load();
return result_index == std::numeric_limits<std::size_t>::max() ? end : std::next(begin, result_index);
}
int main()
{
namespace ba = boost::accumulators;
typedef std::vector<int> container;
typedef container::iterator iter;
typedef ba::accumulator_set<double, ba::stats<ba::tag::mean, ba::tag::median> > ba_acc_set;
container vec(100000000, 0);
boost::copy(boost::irange<int>(0, vec.size()), vec.begin());
const int runs = 5;
const int search_val = vec.size() / 2 ;
std::cout << "version 1" << std::endl;
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = parallel_find(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
std::cout << "version 2" << std::endl;
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = parallel_find2(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
std::cout << "serial" << std::endl;
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = std::find(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment