Last active
February 23, 2025 16:00
-
-
Save f0k/0d6431e3faa60bffc788f8b4daa029b1 to your computer and use it in GitHub Desktop.
Simple program to test whether nvcc/CUDA work
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <cuda.h> | |
#include <cuda_runtime_api.h> | |
/* Outputs some information on CUDA-enabled devices on your computer, | |
* including compute capability and current memory usage. | |
* | |
* On Linux, compile with: nvcc -o cuda_check cuda_check.c -lcuda | |
* On Windows, compile with: nvcc -o cuda_check.exe cuda_check.c -lcuda | |
* | |
* Authors: Thomas Unterthiner, Jan Schlüter | |
*/ | |
int ConvertSMVer2Cores(int major, int minor) | |
{ | |
// Returns the number of CUDA cores per multiprocessor for a given | |
// Compute Capability version. There is no way to retrieve that via | |
// the API, so it needs to be hard-coded. | |
// See _ConvertSMVer2Cores in helper_cuda.h in NVIDIA's CUDA Samples. | |
switch ((major << 4) + minor) { | |
case 0x10: return 8; // Tesla | |
case 0x11: return 8; | |
case 0x12: return 8; | |
case 0x13: return 8; | |
case 0x20: return 32; // Fermi | |
case 0x21: return 48; | |
case 0x30: return 192; // Kepler | |
case 0x32: return 192; | |
case 0x35: return 192; | |
case 0x37: return 192; | |
case 0x50: return 128; // Maxwell | |
case 0x52: return 128; | |
case 0x53: return 128; | |
case 0x60: return 64; // Pascal | |
case 0x61: return 128; | |
case 0x62: return 128; | |
case 0x70: return 64; // Volta | |
case 0x72: return 64; // Xavier | |
case 0x75: return 64; // Turing | |
case 0x80: return 64; // Ampere | |
case 0x86: return 128; | |
case 0x87: return 128; | |
case 0x89: return 128; // Ada | |
case 0x90: return 129; // Hopper | |
default: return 0; | |
} | |
} | |
int main() | |
{ | |
int nGpus; | |
int i; | |
char name[100]; | |
int cc_major, cc_minor, cores, cuda_cores, threads_per_core, clockrate; | |
size_t freeMem; | |
size_t totalMem; | |
CUresult result; | |
CUdevice device; | |
CUcontext context; | |
result = cuInit(0); | |
if (result != CUDA_SUCCESS) { | |
printf("cuInit failed with error code %d: %s\n", result, cudaGetErrorString(result)); | |
return 1; | |
} | |
result = cuDeviceGetCount(&nGpus); | |
if (result != CUDA_SUCCESS) { | |
printf("cuDeviceGetCount failed with error code %d: %s\n", result, cudaGetErrorString(result)); | |
return 1; | |
} | |
printf("Found %d device(s).\n", nGpus); | |
for (i = 0; i < nGpus; i++) { | |
cuDeviceGet(&device, i); | |
printf("Device: %d\n", i); | |
if (cuDeviceGetName(&name[0], sizeof(name), device) == CUDA_SUCCESS) { | |
printf(" Name: %s\n", &name[0]); | |
} | |
if ((cuDeviceGetAttribute(&cc_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device) == CUDA_SUCCESS) && | |
(cuDeviceGetAttribute(&cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device) == CUDA_SUCCESS)) { | |
printf(" Compute Capability: %d.%d\n", cc_major, cc_minor); | |
} | |
else { | |
cc_major = cc_minor = 0; | |
} | |
if (cuDeviceGetAttribute(&cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device) == CUDA_SUCCESS) { | |
printf(" Multiprocessors: %d\n", cores); | |
if (cc_major && cc_minor) { | |
cuda_cores = cores * ConvertSMVer2Cores(cc_major, cc_minor); | |
if (cuda_cores > 0) { | |
printf(" CUDA Cores: %d\n", cuda_cores); | |
} | |
else { | |
printf(" CUDA Cores: unknown\n"); | |
} | |
} | |
if (cuDeviceGetAttribute(&threads_per_core, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, device) == CUDA_SUCCESS) { | |
printf(" Concurrent threads: %d\n", cores*threads_per_core); | |
} | |
} | |
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device) == CUDA_SUCCESS) { | |
printf(" GPU clock: %g MHz\n", clockrate/1000.); | |
} | |
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device) == CUDA_SUCCESS) { | |
printf(" Memory clock: %g MHz\n", clockrate/1000.); | |
} | |
cuCtxCreate(&context, 0, device); | |
result = cuMemGetInfo(&freeMem, &totalMem); | |
if (result == CUDA_SUCCESS ) { | |
printf(" Total Memory: %ld MiB\n Free Memory: %ld MiB\n", totalMem / ( 1024 * 1024 ), freeMem / ( 1024 * 1024 )); | |
} else { | |
printf(" cMemGetInfo failed with error code %d: %s\n", result, cudaGetErrorString(result)); | |
} | |
cuCtxDestroy(context); | |
} | |
return 0; | |
} |
Thanks @igormorgado, I've updated the gist accordingly (and also added some missing architectures to ConvertSMVer2Cores).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To compile use:
To fix the deprecated warning just do the following changes:
Where you read
Replace for:
And where you read
Replace for:
Here