Created
June 4, 2024 04:15
-
-
Save youkaichao/d96433deea23fbbcfce479ba0c1f73eb to your computer and use it in GitHub Desktop.
wait kernel, gpu waits for cpu signal to continue
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <iostream> | |
#include <cuda_runtime.h> | |
__global__ void waitKernel(volatile bool *flag) { | |
// Busy-wait loop | |
while (!*flag) { | |
// The use of volatile ensures that the GPU fetches the flag value from memory each time | |
// This is necessary because without volatile, the compiler might optimize the memory read | |
__threadfence_system(); // Optional for system-wide memory coherence | |
} | |
printf("will finish!\n"); | |
} | |
int main() { | |
bool *d_flag; | |
// Allocate managed memory | |
cudaMallocManaged(&d_flag, sizeof(bool)); | |
// Initialize flag to false | |
*d_flag = false; | |
// Launch the kernel | |
waitKernel<<<1, 1>>>((volatile bool *)d_flag); | |
// Change the flag on the host after some condition or delay | |
std::cout << "Kernel is waiting. Press enter to continue..." << std::endl; | |
std::cin.get(); | |
// Set the flag to true to stop the GPU busy-wait loop | |
*d_flag = true; | |
// Wait for the kernel to finish | |
cudaDeviceSynchronize(); | |
// Free the managed memory | |
cudaFree(d_flag); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment