-
-
Save f0k/0d6431e3faa60bffc788f8b4daa029b1 to your computer and use it in GitHub Desktop.
| #include <stdio.h> | |
| #include <cuda.h> | |
| #include <cuda_runtime_api.h> | |
| /* Outputs some information on CUDA-enabled devices on your computer, | |
| * including compute capability and current memory usage. | |
| * | |
| * On Linux, compile with: nvcc -o cuda_check cuda_check.c -lcuda | |
| * On Windows, compile with: nvcc -o cuda_check.exe cuda_check.c -lcuda | |
| * | |
| * Authors: Thomas Unterthiner, Jan Schlüter | |
| */ | |
| int ConvertSMVer2Cores(int major, int minor) | |
| { | |
| // Returns the number of CUDA cores per multiprocessor for a given | |
| // Compute Capability version. There is no way to retrieve that via | |
| // the API, so it needs to be hard-coded. | |
| // See _ConvertSMVer2Cores in helper_cuda.h in NVIDIA's CUDA Samples. | |
| switch ((major << 4) + minor) { | |
| case 0x10: return 8; // Tesla | |
| case 0x11: return 8; | |
| case 0x12: return 8; | |
| case 0x13: return 8; | |
| case 0x20: return 32; // Fermi | |
| case 0x21: return 48; | |
| case 0x30: return 192; // Kepler | |
| case 0x32: return 192; | |
| case 0x35: return 192; | |
| case 0x37: return 192; | |
| case 0x50: return 128; // Maxwell | |
| case 0x52: return 128; | |
| case 0x53: return 128; | |
| case 0x60: return 64; // Pascal | |
| case 0x61: return 128; | |
| case 0x62: return 128; | |
| case 0x70: return 64; // Volta | |
| case 0x72: return 64; // Xavier | |
| case 0x75: return 64; // Turing | |
| case 0x80: return 64; // Ampere | |
| case 0x86: return 128; | |
| case 0x87: return 128; | |
| case 0x89: return 128; // Ada | |
| case 0x90: return 129; // Hopper | |
| default: return 0; | |
| } | |
| } | |
| int main() | |
| { | |
| int nGpus; | |
| int i; | |
| char name[100]; | |
| int cc_major, cc_minor, cores, cuda_cores, threads_per_core, clockrate; | |
| size_t freeMem; | |
| size_t totalMem; | |
| CUresult result; | |
| CUdevice device; | |
| CUcontext context; | |
| result = cuInit(0); | |
| if (result != CUDA_SUCCESS) { | |
| printf("cuInit failed with error code %d: %s\n", result, cudaGetErrorString(result)); | |
| return 1; | |
| } | |
| result = cuDeviceGetCount(&nGpus); | |
| if (result != CUDA_SUCCESS) { | |
| printf("cuDeviceGetCount failed with error code %d: %s\n", result, cudaGetErrorString(result)); | |
| return 1; | |
| } | |
| printf("Found %d device(s).\n", nGpus); | |
| for (i = 0; i < nGpus; i++) { | |
| cuDeviceGet(&device, i); | |
| printf("Device: %d\n", i); | |
| if (cuDeviceGetName(&name[0], sizeof(name), device) == CUDA_SUCCESS) { | |
| printf(" Name: %s\n", &name[0]); | |
| } | |
| if ((cuDeviceGetAttribute(&cc_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device) == CUDA_SUCCESS) && | |
| (cuDeviceGetAttribute(&cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device) == CUDA_SUCCESS)) { | |
| printf(" Compute Capability: %d.%d\n", cc_major, cc_minor); | |
| } | |
| else { | |
| cc_major = cc_minor = 0; | |
| } | |
| if (cuDeviceGetAttribute(&cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device) == CUDA_SUCCESS) { | |
| printf(" Multiprocessors: %d\n", cores); | |
| if (cc_major && cc_minor) { | |
| cuda_cores = cores * ConvertSMVer2Cores(cc_major, cc_minor); | |
| if (cuda_cores > 0) { | |
| printf(" CUDA Cores: %d\n", cuda_cores); | |
| } | |
| else { | |
| printf(" CUDA Cores: unknown\n"); | |
| } | |
| } | |
| if (cuDeviceGetAttribute(&threads_per_core, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, device) == CUDA_SUCCESS) { | |
| printf(" Concurrent threads: %d\n", cores*threads_per_core); | |
| } | |
| } | |
| if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device) == CUDA_SUCCESS) { | |
| printf(" GPU clock: %g MHz\n", clockrate/1000.); | |
| } | |
| if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device) == CUDA_SUCCESS) { | |
| printf(" Memory clock: %g MHz\n", clockrate/1000.); | |
| } | |
| cuCtxCreate(&context, 0, device); | |
| result = cuMemGetInfo(&freeMem, &totalMem); | |
| if (result == CUDA_SUCCESS ) { | |
| printf(" Total Memory: %ld MiB\n Free Memory: %ld MiB\n", totalMem / ( 1024 * 1024 ), freeMem / ( 1024 * 1024 )); | |
| } else { | |
| printf(" cMemGetInfo failed with error code %d: %s\n", result, cudaGetErrorString(result)); | |
| } | |
| cuCtxDestroy(context); | |
| } | |
| return 0; | |
| } |
# Replace
printf("cuInit failed with error code %d: %s\n", result, cudaGetErrorString(result));
# With
const char* errStr;
cuGetErrorString(result, &errStr);
printf("cuInit failed with error code %d: %s\n", result, errStr);
Fix other two cudaGetErrorString too.
Fixed cuda_check.cu
jag@Aigen:~$ nvtop
No GPU to monitor.
@apivovarov: Thanks, nice catch! cuInit is from the driver API and returns a CUresult, while cudaGetErrorString is from the runtime API and expects a cudaError, so the code is mixing the two.
In any case, there are two ways to compile this code:
- The one mentioned in the beginning of the file is to store it as
cuda_check.cand compile it withnvcc -o cuda_check cuda_check.c -lcuda. This gives some deprecation warning oncuDeviceComputeCapability(as also seen by @zhmlcg), but still works. - The one attempted by @prabathbr and @apivovarov is to store the file as
cuda_check.cuand compile it withnvcc -o cuda_check cuda_check.cu. This one does not work.
I guess the code should be fixed to consistently use only the driver API or only the runtime API, but it still works (and is backwards-compatible down to CUDA 3 or so).
To compile use:
nvcc -o cuda_check cuda_check.c -lcuda
To fix the deprecated warning just do the following changes:
Where you read
if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {Replace for:
if ((cuDeviceGetAttribute(&cc_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device) == CUDA_SUCCESS) &&
(cuDeviceGetAttribute(&cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device) == CUDA_SUCCESS)) {And where you read
cuCtxDetach(context);Replace for:
cuCtxDestroy(context);Here
➤ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Jun_13_19:16:58_PDT_2023
Cuda compilation tools, release 12.2, V12.2.91
Build cuda_12.2.r12.2/compiler.32965470_0
Thanks @igormorgado, I've updated the gist accordingly (and also added some missing architectures to ConvertSMVer2Cores).
with CUDA 11.5...