Skip to content

Instantly share code, notes, and snippets.

@apivovarov
Last active June 7, 2023 23:11
Show Gist options
  • Save apivovarov/bb99281bfbb864dda38a77110655cec2 to your computer and use it in GitHub Desktop.
Save apivovarov/bb99281bfbb864dda38a77110655cec2 to your computer and use it in GitHub Desktop.
Simple program to test whether nvcc/CUDA work
// To compile - nvcc cuda_check.cu -o cuda_check -lcuda
// To run ./cuda_check
// set g++ path to older g++ if needed - export NVCC_PREPEND_FLAGS='-ccbin
// /usr/local/gcc-11/bin/g++-11'
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <stdio.h>
/* Outputs some information on CUDA-enabled devices on your computer,
* including compute capability and current memory usage.
*
* On Linux, compile with: nvcc -o cuda_check cuda_check.c -lcuda
* On Windows, compile with: nvcc -o cuda_check.exe cuda_check.c -lcuda
*
* Authors: Thomas Unterthiner, Jan Schlüter
*/
int ConvertSMVer2Cores(int major, int minor) {
// Returns the number of CUDA cores per multiprocessor for a given
// Compute Capability version. There is no way to retrieve that via
// the API, so it needs to be hard-coded.
// See _ConvertSMVer2Cores in helper_cuda.h in NVIDIA's CUDA Samples.
switch ((major << 4) + minor) {
case 0x10:
return 8; // Tesla
case 0x11:
return 8;
case 0x12:
return 8;
case 0x13:
return 8;
case 0x20:
return 32; // Fermi
case 0x21:
return 48;
case 0x30:
return 192; // Kepler
case 0x32:
return 192;
case 0x35:
return 192;
case 0x37:
return 192;
case 0x50:
return 128; // Maxwell
case 0x52:
return 128;
case 0x53:
return 128;
case 0x60:
return 64; // Pascal
case 0x61:
return 128;
case 0x62:
return 128;
case 0x70:
return 64; // Volta
case 0x72:
return 64; // Xavier
case 0x75:
return 64; // Turing
default:
return 0;
}
}
int main() {
const char *errStr;
int nGpus;
int i;
char name[100];
int cc_major, cc_minor, cores, cuda_cores, threads_per_core, clockrate;
size_t freeMem;
size_t totalMem;
CUresult result;
CUdevice device;
CUcontext context;
result = cuInit(0);
if (result != CUDA_SUCCESS) {
cuGetErrorString(result, &errStr);
printf("cuInit failed with error code %d: %s\n", result, errStr);
return 1;
}
result = cuDeviceGetCount(&nGpus);
if (result != CUDA_SUCCESS) {
cuGetErrorString(result, &errStr);
printf("cuDeviceGetCount failed with error code %d: %s\n", result, errStr);
return 1;
}
printf("Found %d device(s).\n", nGpus);
for (i = 0; i < nGpus; i++) {
cuDeviceGet(&device, i);
printf("Device: %d\n", i);
if (cuDeviceGetName(&name[0], sizeof(name), device) == CUDA_SUCCESS) {
printf(" Name: %s\n", &name[0]);
}
if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) ==
CUDA_SUCCESS) {
printf(" Compute Capability: %d.%d\n", cc_major, cc_minor);
}
if (cuDeviceGetAttribute(&cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
device) == CUDA_SUCCESS) {
printf(" Multiprocessors: %d\n", cores);
if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) ==
CUDA_SUCCESS) {
cuda_cores = cores * ConvertSMVer2Cores(cc_major, cc_minor);
if (cuda_cores > 0) {
printf(" CUDA Cores: %d\n", cuda_cores);
} else {
printf(" CUDA Cores: unknown\n");
}
}
if (cuDeviceGetAttribute(
&threads_per_core,
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
device) == CUDA_SUCCESS) {
printf(" Concurrent threads: %d\n", cores * threads_per_core);
}
}
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
device) == CUDA_SUCCESS) {
printf(" GPU clock: %g MHz\n", clockrate / 1000.);
}
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
device) == CUDA_SUCCESS) {
printf(" Memory clock: %g MHz\n", clockrate / 1000.);
}
cuCtxCreate(&context, 0, device);
result = cuMemGetInfo(&freeMem, &totalMem);
if (result == CUDA_SUCCESS) {
printf(" Total Memory: %ld MiB\n Free Memory: %ld MiB\n",
totalMem / (1024 * 1024), freeMem / (1024 * 1024));
} else {
cuGetErrorString(result, &errStr);
printf(" cMemGetInfo failed with error code %d: %s\n", result, errStr);
}
cuCtxDetach(context);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment