Last active
November 18, 2024 16:06
-
-
Save qfgaohao/0a285941c38cceb186fcaa464b349320 to your computer and use it in GitHub Desktop.
List GPU Specs. The code is modified from the Udacity Parallel Computing Course.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
void deviceQuery () | |
{ | |
cudaDeviceProp prop; | |
int nDevices=0, i; | |
cudaError_t ierr; | |
ierr = cudaGetDeviceCount(&nDevices); | |
if (ierr != cudaSuccess) { printf("Sync error: %s\n", cudaGetErrorString(ierr)); } | |
for( i = 0; i < nDevices; ++i ) | |
{ | |
ierr = cudaGetDeviceProperties(&prop, i); | |
printf("Device number: %d\n", i); | |
printf(" Device name: %s\n", prop.name); | |
printf(" Compute capability: %d.%d\n\n", prop.major, prop.minor); | |
printf(" Clock Rate: %d kHz\n", prop.clockRate); | |
printf(" Total SMs: %d \n", prop.multiProcessorCount); | |
printf(" Shared Memory Per SM: %lu bytes\n", prop.sharedMemPerMultiprocessor); | |
printf(" Registers Per SM: %d 32-bit\n", prop.regsPerMultiprocessor); | |
printf(" Max threads per SM: %d\n", prop.maxThreadsPerMultiProcessor); | |
printf(" L2 Cache Size: %d bytes\n", prop.l2CacheSize); | |
printf(" Total Global Memory: %lu bytes\n", prop.totalGlobalMem); | |
printf(" Memory Clock Rate: %d kHz\n\n", prop.memoryClockRate); | |
printf(" Max threads per block: %d\n", prop.maxThreadsPerBlock); | |
printf(" Max threads in X-dimension of block: %d\n", prop.maxThreadsDim[0]); | |
printf(" Max threads in Y-dimension of block: %d\n", prop.maxThreadsDim[1]); | |
printf(" Max threads in Z-dimension of block: %d\n\n", prop.maxThreadsDim[2]); | |
printf(" Max blocks in X-dimension of grid: %d\n", prop.maxGridSize[0]); | |
printf(" Max blocks in Y-dimension of grid: %d\n", prop.maxGridSize[1]); | |
printf(" Max blocks in Z-dimension of grid: %d\n\n", prop.maxGridSize[2]); | |
printf(" Shared Memory Per Block: %lu bytes\n", prop.sharedMemPerBlock); | |
printf(" Registers Per Block: %d 32-bit\n", prop.regsPerBlock); | |
printf(" Warp size: %d\n\n", prop.warpSize); | |
} | |
} | |
int main() { | |
deviceQuery(); | |
} |
Device number: 0
Device name: Tesla V100-SXM2-32GB
Compute capability: 7.0
Clock Rate: 1530000 kHz
Total SMs: 80
Shared Memory Per SM: 98304 bytes
Registers Per SM: 65536 32-bit
Max threads per SM: 2048
L2 Cache Size: 6291456 bytes
Total Global Memory: 34089730048 bytes
Memory Clock Rate: 877000 kHz
Max threads per block: 1024
Max threads in X-dimension of block: 1024
Max threads in Y-dimension of block: 1024
Max threads in Z-dimension of block: 64
Max blocks in X-dimension of grid: 2147483647
Max blocks in Y-dimension of grid: 65535
Max blocks in Z-dimension of grid: 65535
Shared Memory Per Block: 49152 bytes
Registers Per Block: 65536 32-bit
Warp size: 32
They really have come a long way
To run this on Windows:
winget install Microsoft.VC++2015-2022Redist-x64
winget install Nvidia.CUDA
nvcc device_properties.cu -o device_properties -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Tools\MSVC\14.31.31103\bin\Hostx64\x64"
device_properties.exe
My results:
Device number: 0
Device name: GeForce GTX 650
Compute capability: 3.0
Clock Rate: 0 kHz
Total SMs: 2
Shared Memory Per SM: 49152 bytes
Registers Per SM: 65536 32-bit
Max threads per SM: 2048
L2 Cache Size: 262144 bytes
Total Global Memory: 1073741824 bytes
Memory Clock Rate: 0 kHz
Max threads per block: 1024
Max threads in X-dimension of block: 1024
Max threads in Y-dimension of block: 1024
Max threads in Z-dimension of block: 64
Max blocks in X-dimension of grid: 2147483647
Max blocks in Y-dimension of grid: 65535
Max blocks in Z-dimension of grid: 65535
Shared Memory Per Block: 49152 bytes
Registers Per Block: 65536 32-bit
Warp size: 32
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Device number: 0
Device name: GRID K520
Compute capability: 3.0
Clock Rate: 797000 kHz
Total SMs: 8 bytes
Shared Memory Per SM: 49152 bytes
Registers Per SM: 65536 32-bit
Max threads per SM: 2048
L2 Cache Size: 524288 bytes
Total Global Memory: 4232577024 bytes
Memory Clock Rate: 2500000 kHz
Max threads per block: 1024
Max threads in X-dimension of block: 1024
Max threads in Y-dimension of block: 1024
Max threads in Z-dimension of block: 64
Max blocks in X-dimension of grid: 2147483647
Max blocks in Y-dimension of grid: 65535
Max blocks in Z-dimension of grid: 65535
Shared Memory Per Block: 49152 bytes
Registers Per Block: 65536 32-bit
Warp size: 32