Created
January 23, 2021 20:18
-
-
Save maleadt/d84401fe402fe86f90f018c3a4747b78 to your computer and use it in GitHub Desktop.
Stream-ordered memory allocator + device reset = launch failure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <cuda.h> | |
#define check(ans) { _check((ans), __FILE__, __LINE__); } | |
inline void _check(CUresult code, const char *file, int line) | |
{ | |
if (code != CUDA_SUCCESS) | |
{ | |
const char *name; | |
cuGetErrorName(code, &name); | |
fprintf(stderr,"ERROR %s at %s:%d\n", name, file, line); | |
exit(code); | |
} | |
} | |
int main() { | |
check(cuInit(0)); | |
CUdevice dev; | |
check(cuDeviceGet(&dev, 0)); | |
CUcontext ctx; | |
check(cuDevicePrimaryCtxRetain(&ctx, dev)); | |
check(cuCtxSetCurrent(ctx)); | |
CUstream stream; | |
check(cuStreamCreate(&stream, 0)); | |
CUdeviceptr ptr; | |
check(cuMemAllocAsync(&ptr, 8, stream)); | |
// reset | |
check(cuDevicePrimaryCtxReset_v2(dev)); | |
check(cuDevicePrimaryCtxRetain(&ctx, dev)); | |
check(cuCtxSetCurrent(ctx)); | |
check(cuStreamCreate(&stream, 0)); | |
check(cuMemAllocAsync(&ptr, 8, stream)); | |
void* cpu_ptr = malloc(8); | |
check(cuMemcpyHtoDAsync_v2(ptr, cpu_ptr, 8, stream)); | |
check(cuStreamSynchronize(stream)); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#define check(ans) { _check((ans), __FILE__, __LINE__); } | |
inline void _check(cudaError_t code, const char *file, int line) | |
{ | |
if (code != cudaSuccess) | |
{ | |
fprintf(stderr,"ERROR %s at %s:%d\n", cudaGetErrorName(code), file, line); | |
exit(code); | |
} | |
} | |
int main() { | |
cudaStream_t stream; | |
void *cpu, *gpu; | |
cpu = malloc(8); | |
check(cudaStreamCreate(&stream)); | |
check(cudaMallocAsync(&gpu, 8, stream)); | |
printf("ptr: 0x%p\n", gpu); | |
check(cudaMemcpyAsync(gpu, cpu, 8, cudaMemcpyHostToDevice, stream)); | |
check(cudaStreamSynchronize(stream)); | |
check(cudaStreamDestroy(stream)); | |
check(cudaDeviceReset()); | |
check(cudaStreamCreate(&stream)); | |
check(cudaMallocAsync(&gpu, 8, stream)); | |
printf("ptr: 0x%p\n", gpu); | |
check(cudaMemcpyAsync(gpu, cpu, 8, cudaMemcpyHostToDevice, stream)); | |
check(cudaStreamSynchronize(stream)); | |
check(cudaStreamDestroy(stream)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment