Skip to content

Instantly share code, notes, and snippets.

@goldshtn
Created October 23, 2013 15:18
Show Gist options
  • Save goldshtn/7120695 to your computer and use it in GitHub Desktop.
Save goldshtn/7120695 to your computer and use it in GitHub Desktop.
Benchmark for parallelizing work unevenly across multiple threads.
#include <chrono>
#include <thread>
#include <iostream>
#include <cmath>
#include <vector>
#include <functional>
#include <algorithm>
template <typename Fn>
double avg_time_ms(Fn&& fn, unsigned int repetitions) {
double total = 0.0;
for (unsigned int i = 0; i < repetitions; ++i) {
auto start = std::chrono::steady_clock::now();
fn();
total += std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start).count() / 1000.0;
}
return total / repetitions;
}
bool is_prime(unsigned int n) {
if (n == 2) return true;
if (n % 2 == 0) return false;
#ifdef NAIVE_PRIMES
unsigned int root = n;
#else
unsigned int root = static_cast<unsigned int>(
std::sqrt(static_cast<double>(n)));
#endif
for (unsigned int x = 3; x <= root; x += 2) {
if (n % x == 0) return false;
}
return true;
}
unsigned int count_primes(unsigned int begin, unsigned int end) {
unsigned int count = 0;
for (unsigned int n = begin; n < end; ++n) {
if (is_prime(n)) ++count;
}
return count;
}
volatile unsigned int dont_optimize;
void parallelize_count(unsigned int nthreads, unsigned int begin, unsigned int end) {
std::vector<std::thread> threads;
unsigned int chunk_size = (end - begin) / nthreads;
for (unsigned int i = 0; i < nthreads; ++i) {
unsigned int chunk_begin = chunk_size*i;
unsigned int chunk_end = chunk_begin + chunk_size;
threads.emplace_back([=]() {
dont_optimize = count_primes(chunk_begin, chunk_end);
});
}
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
}
int main() {
unsigned int repetitions = 3;
#ifdef NAIVE_PRIMES
unsigned int begin = 2, end = 100002;
#else
unsigned int begin = 2, end = 8000002;
#endif
std::cout << "#\ttime(s)" << std::endl;
for (unsigned int nthreads = 1; nthreads <= 64; ++nthreads) {
std::cout << nthreads << "\t" << avg_time_ms([=]() {
parallelize_count(nthreads, begin, end);
}, repetitions) << std::endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment