Skip to content

Instantly share code, notes, and snippets.

@n-canter
Last active April 6, 2017 15:58
Show Gist options
  • Save n-canter/02edc7eeb9f265be551792051f453040 to your computer and use it in GitHub Desktop.
Save n-canter/02edc7eeb9f265be551792051f453040 to your computer and use it in GitHub Desktop.
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include <pthread.h>
#include <sys/time.h>
#include <math.h>
typedef void(runner_t)(double *data, double *res, int n, int nthreads);
inline double calc(double val)
{
// some heavy arithmetics
return log(val) * sqrt(val) * sin(val) * cos(val) * log10(val) * sinh(val) * cosh(val) * exp(val);
}
void timer(char *str, double *data, double *res, int n, int nthreads, runner_t run)
{
struct timeval start, end;
gettimeofday(&start, NULL);
run(data, res, n, nthreads);
gettimeofday(&end, NULL);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
double delta = ((end.tv_sec - start.tv_sec) * 1000000u + end.tv_usec - start.tv_usec) / 1.e6;
printf("rank %d: %s elapsed %lf s\n", rank, str, delta);
}
void seq(double *data, double *res, int n, int nthreads)
{
for (int i = 0; i < n; i++) {
res[i] = calc(data[i]);
}
}
void omp(double *data, double *res, int n, int nthreads)
{
omp_set_num_threads(nthreads);
#pragma omp parallel for
for (int i = 0; i < n; i++) {
res[i] = calc(data[i]);
}
}
typedef struct context_t {
double *data;
double *res;
int l;
int r;
} context_t;
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
void pthreads(double *data, double *res, int n, int nthreads)
{
pthread_t threads[nthreads];
context_t ctxs[nthreads];
int chunk = n / nthreads;
void *cb(void *context)
{
context_t *ctx = context;
double *data = ctx->data;
double *res = ctx->res;
for (int i = ctx->l; i < ctx->r; i++) {
res[i] = calc(data[i]);
}
return NULL;
}
for (int i = 0; i < nthreads; i++) {
context_t ctx = {
.data = data,
.res = res,
.l = chunk * i,
.r = MIN(chunk * i + chunk, n),
};
ctxs[i] = ctx;
pthread_create(threads + i, NULL, cb, ctxs + i);
}
for (int i = 0; i < nthreads; i++)
pthread_join(threads[i], NULL);
}
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
int n = 1e7;
srand(time(NULL));
double *data = calloc(n, sizeof(double));
double *res = calloc(n, sizeof(double));
for (int i = 0; i < n; i++) {
data[i] = ((double)rand() / (double)RAND_MAX);
}
timer("seq", data, res, n, 4, seq);
timer("openMP", data, res, n, 12, omp);
timer("pthreads", data, res, n, 12, pthreads);
free(data);
free(res);
MPI_Finalize();
return 0;
}
CFLAGS = -fopenmp\
-pthread\
-lm\
-O3\
-ggdb\
-std=gnu99\
mpi:
mpicc mpi_run.c -o mpi_run $(CFLAGS)
clean:
rm mpi_run
rank 0: seq elapsed 1.447793 s
rank 1: seq elapsed 1.682763 s
rank 3: seq elapsed 1.687242 s
rank 2: seq elapsed 1.692448 s
rank 0: openMP elapsed 1.240302 s
rank 1: openMP elapsed 1.411996 s
rank 2: openMP elapsed 1.422169 s
rank 3: openMP elapsed 1.409507 s
rank 0: pthreads elapsed 1.225619 s
rank 1: pthreads elapsed 1.419391 s
rank 2: pthreads elapsed 1.426155 s
rank 3: pthreads elapsed 1.415929 s
rank 0: seq elapsed 0.351817 s
rank 1: seq elapsed 0.346876 s
rank 2: seq elapsed 0.354236 s
rank 3: seq elapsed 0.364078 s
rank 0: openMP elapsed 0.292070 s
rank 1: openMP elapsed 0.291127 s
rank 2: openMP elapsed 0.294245 s
rank 3: openMP elapsed 0.310225 s
rank 0: pthreads elapsed 0.294949 s
rank 1: pthreads elapsed 0.293086 s
rank 2: pthreads elapsed 0.297634 s
rank 3: pthreads elapsed 0.307837 s
rank 2: seq elapsed 0.367306 s
rank 3: seq elapsed 0.362873 s
rank 1: seq elapsed 0.366119 s
rank 0: seq elapsed 0.385088 s
rank 0: openMP elapsed 0.075559 s
rank 2: openMP elapsed 0.099787 s
rank 3: openMP elapsed 0.097896 s
rank 1: openMP elapsed 0.099792 s
rank 2: pthreads elapsed 0.064278 s
rank 0: pthreads elapsed 0.093605 s
rank 3: pthreads elapsed 0.088347 s
rank 1: pthreads elapsed 0.094478 s
sbatch -N4 --ntasks-per-node=1 ompi ./mpi_run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment