Skip to content

Instantly share code, notes, and snippets.

@jmoy
Last active November 16, 2021 05:42
Show Gist options
  • Save jmoy/decef29f5da86c44d4bc56de2f9eefea to your computer and use it in GitHub Desktop.
Save jmoy/decef29f5da86c44d4bc56de2f9eefea to your computer and use it in GitHub Desktop.
#include <vector>
#include <thread>
#include <mutex>
#include <numeric>
#include <functional>
#include <cassert>
using namespace std;
#include "calc_mean.h"
void summer(size_t start, size_t end,
const double *vec,
double &res,mutex &mx)
{
double sum = 0.0;
for (size_t i=start;i<end;++i)
sum += vec[i];
{
lock_guard l(mx);
res += sum;
}
}
double calc_mean(const vector<double> &v,int nthreads){
vector<thread> pool;
mutex mx;
double sum = 0.0;
size_t chunksz = v.size()/nthreads;
if (chunksz<1)
chunksz=1;
for (size_t i=0;;){
size_t j=i+chunksz;
if (j>v.size())
j=v.size();
pool.emplace_back(summer,i,j,&v[0],
ref(sum),ref(mx));
i = j;
if (i==v.size())
break;
}
for (auto &t: pool)
t.join();
return sum/v.size();
}
#ifndef JMOY_CALC_MEAN_H
#define JMOY_CALC_MEAN_H
double calc_mean(const std::vector<double> &v, int nthreads);
#endif
memlimited: memlimited.cc calc_mean.cc calc_mean.h
clang++ -o memlimited -O3 --std=c++17 \
-ffast-math -march=native \
memlimited.cc calc_mean.cc -pthread
//Calculate the mean of a vector
//Use Makefile or flags therein to compile. -ffast-math is necessary to trigger vectorization
//Run as memlimited [n] [s]
// `n`: number of threads
// `s`: work with vector of size 2^s
//
//Try:
// n=1,4,10
// s=27,28,29
#include <iostream>
#include <chrono>
#include <vector>
using namespace std;
#include "calc_mean.h"
const int NBURNINS=10;
const int NITERS=2;
int main(int argc,char *argv[])
{
if (argc!=3){
cerr<<"Usage: memlimited [nthreads] [size]\n";
return 1;
}
int nthreads = atoi(argv[1]);
size_t N = atoi(argv[2]);
if (nthreads<1){
cerr<<"Must specify at least one thread\n";
return 1;
}
if (N<1) {
cerr<<"Must specify a positive size\n";
return 1;
}
N = 1L<<N;
vector<double> v(N);
for (size_t i=0;i<N;++i){
v[i] = ((long)i%13)-6;
}
vector<double> runtimes(NITERS);
for (size_t it=0;it<NBURNINS+NITERS;++it){
if (it<NBURNINS){
calc_mean(v,nthreads);
} else {
auto tbegin = chrono::steady_clock::now();
double mean=calc_mean(v,nthreads);
auto dur = chrono::steady_clock::now()-tbegin;
runtimes[it-NBURNINS] =
(double) dur.count()*decltype(dur)::period::num
/decltype(dur)::period::den;
}
}
double av_runtime=0.0;
for (auto d: runtimes)
av_runtime += d;
av_runtime /= NITERS;
cout<<"\nSpeed = "<< N*1e-9/av_runtime << " G elems/s\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment