Skip to content

Instantly share code, notes, and snippets.

@malfet
Created October 6, 2025 21:59
Show Gist options
  • Save malfet/cc813faf85052f4b8e11c4efeda56c85 to your computer and use it in GitHub Desktop.
Save malfet/cc813faf85052f4b8e11c4efeda56c85 to your computer and use it in GitHub Desktop.
#include <stdio.h>
__global__ void print() {
printf("Hello World of CUDA threadIdx.x=%d\n", threadIdx.x);
}
__global__ void noop() { }
int main(int argc, const char *argv[]) {
cudaDeviceProp prop;
auto rc = cudaGetDeviceProperties(&prop, 0);
printf("Running on %s sm%d.%d multiProcessorCount = %d maxBlocksPerMultiProcessor = %d maxThreadsPerBlock = %d\n",
prop.name, prop.major, prop.minor, prop.multiProcessorCount, prop.maxBlocksPerMultiProcessor, prop.maxThreadsPerBlock);
if (rc != cudaSuccess) {
printf("cudaGetDeviceProperties()=%d (%s)\n", rc, cudaGetErrorString(rc));
return rc;
}
if (argc > 1) {
rc = cudaSetDeviceFlags(cudaDeviceScheduleYield);
if (rc != cudaSuccess) {
printf("cudaSetDeviceFlags)=%d (%s)\n", rc, cudaGetErrorString(rc));
}
}
for(auto cnt = 0; cnt < 16384; ++cnt) {
noop<<<prop.multiProcessorCount * 10, 16384>>>();
}
rc = cudaDeviceSynchronize();
if (rc != cudaSuccess) {
printf("cudaDeviceSynchronie()=%d (%s)\n", rc, cudaGetErrorString(rc));
}
return rc;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment