youkaichao · June 4, 2024 04:15
diff --git a/wait.cu b/wait.cu
 #include <cstdio>
 #include <iostream>
 #include <cuda_runtime.h>

 __global__ void waitKernel(volatile bool *flag) {
    // Busy-wait loop
    while (!*flag) {
        // The use of volatile ensures that the GPU fetches the flag value from memory each time
        // This is necessary because without volatile, the compiler might optimize the memory read
        __threadfence_system(); // Optional for system-wide memory coherence
    }
    printf("will finish!\n");
 }

 int main() {
    bool *d_flag;

    // Allocate managed memory
    cudaMallocManaged(&d_flag, sizeof(bool));

    // Initialize flag to false
    *d_flag = false;

    // Launch the kernel
    waitKernel<<<1, 1>>>((volatile bool *)d_flag);

    // Change the flag on the host after some condition or delay
    std::cout << "Kernel is waiting. Press enter to continue..." << std::endl;
    std::cin.get();

    // Set the flag to true to stop the GPU busy-wait loop
    *d_flag = true;

    // Wait for the kernel to finish
    cudaDeviceSynchronize();

    // Free the managed memory
    cudaFree(d_flag);

    return 0;
 }
	#include <cstdio>
	#include <iostream>
	#include <cuda_runtime.h>

	__global__ void waitKernel(volatile bool *flag) {
	// Busy-wait loop
	while (!*flag) {
	// The use of volatile ensures that the GPU fetches the flag value from memory each time
	// This is necessary because without volatile, the compiler might optimize the memory read
	__threadfence_system(); // Optional for system-wide memory coherence
	}
	printf("will finish!\n");
	}

	int main() {
	bool *d_flag;

	// Allocate managed memory
	cudaMallocManaged(&d_flag, sizeof(bool));

	// Initialize flag to false
	*d_flag = false;

	// Launch the kernel
	waitKernel<<<1, 1>>>((volatile bool *)d_flag);

	// Change the flag on the host after some condition or delay
	std::cout << "Kernel is waiting. Press enter to continue..." << std::endl;
	std::cin.get();

	// Set the flag to true to stop the GPU busy-wait loop
	*d_flag = true;

	// Wait for the kernel to finish
	cudaDeviceSynchronize();

	// Free the managed memory
	cudaFree(d_flag);

	return 0;
	}