Created
April 24, 2025 11:05
-
-
Save youkaichao/648852317f7445c6c2243f6595875d6b to your computer and use it in GitHub Desktop.
test1.cu
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cuda_runtime.h> | |
#include <cuda.h> | |
// Define the kernel with illegal memory access | |
__global__ void illegalWildPointerKernel(int* data, int size) { | |
int idx = threadIdx.x + blockIdx.x * blockDim.x; | |
__nanosleep(1000000000ULL); // Sleep for 1 second | |
int* wild_pointer = (int*)0x100; | |
if (idx == 0) { | |
wild_pointer[0] = 42; // Illegal access | |
} | |
} | |
int main() { | |
int* d_data; | |
int size = 10; | |
cudaMalloc(&d_data, size * sizeof(int)); | |
cudaMemset(d_data, 0, size * sizeof(int)); | |
// Set up the launch parameters | |
cudaLaunchConfig_t config{}; | |
config.gridDim = dim3(1); | |
config.blockDim = dim3(1); | |
config.dynamicSmemBytes = 100; | |
config.stream = 0; | |
// Launch the kernel with cudaLaunchKernelEx (CUDA 12+) | |
cudaError_t result = cudaLaunchKernelEx(&config, | |
illegalWildPointerKernel, | |
d_data, size); | |
cudaError_t syncErr = cudaDeviceSynchronize(); | |
cudaError_t lastErr = cudaGetLastError(); | |
std::cout << "cudaLaunchKernelEx result: " << cudaGetErrorString(result) << std::endl; | |
std::cout << "cudaDeviceSynchronize error: " << cudaGetErrorString(syncErr) << std::endl; | |
std::cout << "cudaGetLastError: " << cudaGetErrorString(lastErr) << std::endl; | |
cudaFree(d_data); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
compile with
nvcc -arch=sm_70 -std=c++17 test1.cu -o test
run with
./test
:run with
CUDA_LAUNCH_BLOCKING=1 ./test
conclusion:
cudaLaunchKernelEx
respectsCUDA_LAUNCH_BLOCKING
, it will return error code after kernel execution finishes, whenCUDA_LAUNCH_BLOCKING=1
is set.