Last active
November 18, 2024 16:06
-
-
Save qfgaohao/0a285941c38cceb186fcaa464b349320 to your computer and use it in GitHub Desktop.
List GPU Specs. The code is modified from the Udacity Parallel Computing Course.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
void deviceQuery () | |
{ | |
cudaDeviceProp prop; | |
int nDevices=0, i; | |
cudaError_t ierr; | |
ierr = cudaGetDeviceCount(&nDevices); | |
if (ierr != cudaSuccess) { printf("Sync error: %s\n", cudaGetErrorString(ierr)); } | |
for( i = 0; i < nDevices; ++i ) | |
{ | |
ierr = cudaGetDeviceProperties(&prop, i); | |
printf("Device number: %d\n", i); | |
printf(" Device name: %s\n", prop.name); | |
printf(" Compute capability: %d.%d\n\n", prop.major, prop.minor); | |
printf(" Clock Rate: %d kHz\n", prop.clockRate); | |
printf(" Total SMs: %d \n", prop.multiProcessorCount); | |
printf(" Shared Memory Per SM: %lu bytes\n", prop.sharedMemPerMultiprocessor); | |
printf(" Registers Per SM: %d 32-bit\n", prop.regsPerMultiprocessor); | |
printf(" Max threads per SM: %d\n", prop.maxThreadsPerMultiProcessor); | |
printf(" L2 Cache Size: %d bytes\n", prop.l2CacheSize); | |
printf(" Total Global Memory: %lu bytes\n", prop.totalGlobalMem); | |
printf(" Memory Clock Rate: %d kHz\n\n", prop.memoryClockRate); | |
printf(" Max threads per block: %d\n", prop.maxThreadsPerBlock); | |
printf(" Max threads in X-dimension of block: %d\n", prop.maxThreadsDim[0]); | |
printf(" Max threads in Y-dimension of block: %d\n", prop.maxThreadsDim[1]); | |
printf(" Max threads in Z-dimension of block: %d\n\n", prop.maxThreadsDim[2]); | |
printf(" Max blocks in X-dimension of grid: %d\n", prop.maxGridSize[0]); | |
printf(" Max blocks in Y-dimension of grid: %d\n", prop.maxGridSize[1]); | |
printf(" Max blocks in Z-dimension of grid: %d\n\n", prop.maxGridSize[2]); | |
printf(" Shared Memory Per Block: %lu bytes\n", prop.sharedMemPerBlock); | |
printf(" Registers Per Block: %d 32-bit\n", prop.regsPerBlock); | |
printf(" Warp size: %d\n\n", prop.warpSize); | |
} | |
} | |
int main() { | |
deviceQuery(); | |
} |
To run this on Windows:
winget install Microsoft.VC++2015-2022Redist-x64
winget install Nvidia.CUDA
nvcc device_properties.cu -o device_properties -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Tools\MSVC\14.31.31103\bin\Hostx64\x64"
device_properties.exe
My results:
Device number: 0
Device name: GeForce GTX 650
Compute capability: 3.0
Clock Rate: 0 kHz
Total SMs: 2
Shared Memory Per SM: 49152 bytes
Registers Per SM: 65536 32-bit
Max threads per SM: 2048
L2 Cache Size: 262144 bytes
Total Global Memory: 1073741824 bytes
Memory Clock Rate: 0 kHz
Max threads per block: 1024
Max threads in X-dimension of block: 1024
Max threads in Y-dimension of block: 1024
Max threads in Z-dimension of block: 64
Max blocks in X-dimension of grid: 2147483647
Max blocks in Y-dimension of grid: 65535
Max blocks in Z-dimension of grid: 65535
Shared Memory Per Block: 49152 bytes
Registers Per Block: 65536 32-bit
Warp size: 32
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
They really have come a long way