Skip to content

Instantly share code, notes, and snippets.

@mkuron
Last active May 23, 2016 08:19
Show Gist options
  • Save mkuron/77c9ec7f9bab4129a1b5 to your computer and use it in GitHub Desktop.
Save mkuron/77c9ec7f9bab4129a1b5 to your computer and use it in GitHub Desktop.
CUDA + MPI
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <cuda.h>
#include <mpi.h>
#define MAX_NODES 100
#define BUFF_LEN 256
static void HandleError( cudaError_t err,
const char *file,
int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ),
file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
// Enumeration of CUDA devices accessible for the process.
void enumCudaDevices(char *buff)
{
char tmpBuff[BUFF_LEN];
int i, devCount;
cudaError ret = cudaGetDeviceCount(&devCount);
if (ret == cudaErrorNoDevice)
devCount = 0;
else
HandleError(ret, __FILE__, __LINE__-4);
sprintf(tmpBuff," %3d", devCount);
strncat(buff, tmpBuff, BUFF_LEN);
for (i = 0; i < devCount; i++)
{
cudaDeviceProp devProp;
HANDLE_ERROR(cudaGetDeviceProperties(&devProp, i));
sprintf(tmpBuff, " %d:%s", i, devProp.name);
strncat(buff, tmpBuff, BUFF_LEN);
}
}
int main(int argc, char *argv[])
{
int i, myrank, numprocs;
char pName[MPI_MAX_PROCESSOR_NAME],
buff[BUFF_LEN];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Get_processor_name(pName, &i);
sprintf(buff, "%-15s %3d", pName, myrank);
// Find local CUDA devices
enumCudaDevices(buff);
// Collect and print the list of CUDA devices from all MPI processes
if (myrank == 0)
{
char devList[MAX_NODES][BUFF_LEN];
MPI_Gather(buff, BUFF_LEN, MPI_CHAR,
devList, BUFF_LEN, MPI_CHAR,
0, MPI_COMM_WORLD);
for (i = 0; i < numprocs; i++)
printf("%s\n", (char*) (devList + i));
}
else
MPI_Gather(buff, BUFF_LEN, MPI_CHAR,
NULL, 0, MPI_CHAR,
0, MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
MPI_VARIANT=-openmpi-mp
NVCCFLAGS=-gencode arch=compute_30,code=compute_30 -m64
CPPFLAGS=$(shell mpicc$(MPI_VARIANT) --showme:compile)
LDFLAGS=$(shell mpicc$(MPI_VARIANT) --showme:link)
NVCC=/usr/local/cuda/bin/nvcc
MPIEXEC=mpiexec$(MPI_VARIANT)
comma:=,
space:=
space +=
all: cuda_mpi
run: cuda_mpi
./$<
run3: cuda_mpi
$(MPIEXEC) -np 3 $<
clean:
$(RM) cuda_mpi
%: %.cu
$(NVCC) $(CPPFLAGS) $(NVCCFLAGS) $(subst -Wl$(comma),-Xlinker$(space),$(LDFLAGS)) -o $@ $<
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment