Created
December 15, 2016 05:44
-
-
Save wangyangkobe/6e6a55f8d60d1c2299d1aab4845bf378 to your computer and use it in GitHub Desktop.
a parallel version of std::accumulate.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <thread> | |
#include <iostream> | |
#include <memory> | |
#include <vector> | |
#include <algorithm> | |
#include <functional> | |
#include <numeric> | |
using namespace std; | |
template<typename Iterator, typename T> | |
struct accumulate_block | |
{ | |
void operator() (Iterator first, Iterator last, T& result) | |
{ | |
result = std::accumulate(first, last, result); | |
} | |
}; | |
template<typename Iterator, typename T> | |
T parallel_accumulate(Iterator first, Iterator last, T& init) | |
{ | |
unsigned long const length = std::distance(first, last); | |
if(!length) | |
return init; | |
const unsigned long min_per_thread = 25; | |
const unsigned long max_threads = (length + min_per_thread - 1) / min_per_thread; | |
const unsigned long hardware_threads = std::thread::hardware_concurrency(); | |
const unsigned long num_threads = std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads); | |
const unsigned long block_size = length / num_threads; | |
std::vector<T> results(num_threads); | |
std::vector<std::thread> threads(num_threads - 1); | |
Iterator block_start = first; | |
for(unsigned long i = 0; i < (num_threads - 1); i++) | |
{ | |
Iterator block_end = block_start; | |
std::advance(block_end, block_size); | |
threads[i] = std::thread(accumulate_block<Iterator, T>(), block_start, block_end, std::ref(results[i])); | |
block_start = block_end; | |
} | |
accumulate_block<Iterator,T>()(block_start, last, results[num_threads-1]); | |
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join)); | |
return init = std::accumulate(results.begin(), results.end(), init); | |
} | |
int main() | |
{ | |
vector<int> vec(1000000); | |
for(int i = 0; i < 1000000; i++) | |
vec[i] = i + 1; | |
unsigned long result = 0; | |
cout<<parallel_accumulate(begin(vec), end(vec), result)<<endl; | |
cout<<result<<endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A simple implementation of a parallel version of std::accumulate.
It divides the work among the threads, with a minimum number of elements per thread in order to avoid the overhead of too many threads.