Last active
November 16, 2021 03:38
-
-
Save jmoy/2f33b952de19bae8a9723a90d5af54bb to your computer and use it in GitHub Desktop.
A memory bandwidth limited computation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
using namespace std; | |
#include "calc_mean.h" | |
double calc_mean(const vector<double> &v){ | |
double mean = 0.0; | |
#pragma omp parallel for simd reduction(+:mean) | |
for (size_t i=0;i<v.size();++i) | |
mean += v[i]; | |
return mean/v.size(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef JMOY_CALC_MEAN_H | |
#define JMOY_CALC_MEAN_H | |
double calc_mean(const std::vector<double> &); | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
memlimited: memlimited.cc calc_mean.cc calc_mean.h | |
g++ -o memlimited -O3 -march=native -fopenmp memlimited.cc calc_mean.cc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Simulate calculating means and variances | |
//Compile with clang++ -o memlimited -O3 -march=native -fopenmp memlimited.cc | |
//Run as memlimited [n] [s] | |
// for n=1,4,10 | |
// s=27,28,29 | |
#include <iostream> | |
#include <chrono> | |
#include <vector> | |
using namespace std; | |
#include <omp.h> | |
#include "calc_mean.h" | |
const int NBURNINS=10; | |
const int NITERS=2; | |
int main(int argc,char *argv[]) | |
{ | |
if (argc!=3){ | |
cerr<<"Usage: memlimited [nthreads] [size]\n"; | |
return 1; | |
} | |
int nthreads = atoi(argv[1]); | |
size_t N = atoi(argv[2]); | |
if (nthreads<1){ | |
cerr<<"Must specify at least one thread\n"; | |
return 1; | |
} | |
if (N<1) { | |
cerr<<"Must specify a positive size\n"; | |
return 1; | |
} | |
N = 1L<<N; | |
omp_set_num_threads(nthreads); | |
vector<double> v(N); | |
for (size_t i=0;i<N;++i){ | |
v[i] = ((long)i%13)-6; | |
} | |
vector<double> runtimes(NITERS); | |
for (size_t it=0;it<NBURNINS+NITERS;++it){ | |
if (it<NBURNINS){ | |
calc_mean(v); | |
} else { | |
auto tbegin = chrono::steady_clock::now(); | |
calc_mean(v); | |
auto dur = chrono::steady_clock::now()-tbegin; | |
runtimes[it-NBURNINS] = | |
(double) dur.count()*decltype(dur)::period::num | |
/decltype(dur)::period::den; | |
} | |
} | |
double av_runtime=0.0; | |
for (auto d: runtimes) | |
av_runtime += d; | |
av_runtime /= NITERS; | |
cout<<"\nSpeed = "<< N*1e-9/av_runtime << "G elems/s\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment