youkaichao · April 24, 2025 11:05 · youkaichao · Apr 24, 2025
diff --git a/test1.cu b/test1.cu
 #include <iostream>
 #include <cuda_runtime.h>
 #include <cuda.h>

 // Define the kernel with illegal memory access
 __global__ void illegalWildPointerKernel(int* data, int size) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    __nanosleep(1000000000ULL);  // Sleep for 1 second
    int* wild_pointer = (int*)0x100;
    if (idx == 0) {
        wild_pointer[0] = 42;  // Illegal access
    }
 }

 int main() {
    int* d_data;
    int size = 10;
    cudaMalloc(&d_data, size * sizeof(int));
    cudaMemset(d_data, 0, size * sizeof(int));


    // Set up the launch parameters
    cudaLaunchConfig_t config{};
    config.gridDim = dim3(1);
    config.blockDim = dim3(1);
    config.dynamicSmemBytes = 100;
    config.stream = 0;

    // Launch the kernel with cudaLaunchKernelEx (CUDA 12+)
    cudaError_t result = cudaLaunchKernelEx(&config,
                                         illegalWildPointerKernel,
                                         d_data, size);

    cudaError_t syncErr = cudaDeviceSynchronize();
    cudaError_t lastErr = cudaGetLastError();

    std::cout << "cudaLaunchKernelEx result: " << cudaGetErrorString(result) << std::endl;
    std::cout << "cudaDeviceSynchronize error: " << cudaGetErrorString(syncErr) << std::endl;
    std::cout << "cudaGetLastError: " << cudaGetErrorString(lastErr) << std::endl;

    cudaFree(d_data);
    return 0;
 }
	#include <iostream>
	#include <cuda_runtime.h>
	#include <cuda.h>

	// Define the kernel with illegal memory access
	__global__ void illegalWildPointerKernel(int* data, int size) {
	int idx = threadIdx.x + blockIdx.x * blockDim.x;
	__nanosleep(1000000000ULL); // Sleep for 1 second
	int* wild_pointer = (int*)0x100;
	if (idx == 0) {
	wild_pointer[0] = 42; // Illegal access
	}
	}

	int main() {
	int* d_data;
	int size = 10;
	cudaMalloc(&d_data, size * sizeof(int));
	cudaMemset(d_data, 0, size * sizeof(int));


	// Set up the launch parameters
	cudaLaunchConfig_t config{};
	config.gridDim = dim3(1);
	config.blockDim = dim3(1);
	config.dynamicSmemBytes = 100;
	config.stream = 0;

	// Launch the kernel with cudaLaunchKernelEx (CUDA 12+)
	cudaError_t result = cudaLaunchKernelEx(&config,
	illegalWildPointerKernel,
	d_data, size);

	cudaError_t syncErr = cudaDeviceSynchronize();
	cudaError_t lastErr = cudaGetLastError();

	std::cout << "cudaLaunchKernelEx result: " << cudaGetErrorString(result) << std::endl;
	std::cout << "cudaDeviceSynchronize error: " << cudaGetErrorString(syncErr) << std::endl;
	std::cout << "cudaGetLastError: " << cudaGetErrorString(lastErr) << std::endl;

	cudaFree(d_data);
	return 0;
	}