maleadt · January 23, 2021 20:18
diff --git a/demo.c b/demo.c
 #include <stdio.h>
 #include <cuda.h>

 #define check(ans) { _check((ans), __FILE__, __LINE__); }
 inline void _check(CUresult code, const char *file, int line)
 {
   if (code != CUDA_SUCCESS)
   {
        const char *name;
        cuGetErrorName(code, &name);
        fprintf(stderr,"ERROR %s at %s:%d\n", name, file, line);
        exit(code);
   }
 }

 int main() {
    check(cuInit(0));

    CUdevice dev;
    check(cuDeviceGet(&dev, 0));

    CUcontext ctx;
    check(cuDevicePrimaryCtxRetain(&ctx, dev));

    check(cuCtxSetCurrent(ctx));

    CUstream stream;
    check(cuStreamCreate(&stream, 0));

    CUdeviceptr ptr;
    check(cuMemAllocAsync(&ptr, 8, stream));

    // reset

    check(cuDevicePrimaryCtxReset_v2(dev));

    check(cuDevicePrimaryCtxRetain(&ctx, dev));

    check(cuCtxSetCurrent(ctx));

    check(cuStreamCreate(&stream, 0));

    check(cuMemAllocAsync(&ptr, 8, stream));
    void* cpu_ptr = malloc(8);
    check(cuMemcpyHtoDAsync_v2(ptr, cpu_ptr, 8, stream));
    check(cuStreamSynchronize(stream));

    return 0;
 }
diff --git a/demo.cu b/demo.cu
 #include <stdio.h>

 #define check(ans) { _check((ans), __FILE__, __LINE__); }
 inline void _check(cudaError_t code, const char *file, int line)
 {
   if (code != cudaSuccess)
   {
      fprintf(stderr,"ERROR %s at %s:%d\n", cudaGetErrorName(code), file, line);
      exit(code);
   }
 }

 int main() {
    cudaStream_t stream;
    void *cpu, *gpu;

    cpu = malloc(8);

    check(cudaStreamCreate(&stream));
    check(cudaMallocAsync(&gpu, 8, stream));
    printf("ptr: 0x%p\n", gpu);
    check(cudaMemcpyAsync(gpu, cpu, 8, cudaMemcpyHostToDevice, stream));
    check(cudaStreamSynchronize(stream));
    check(cudaStreamDestroy(stream));

    check(cudaDeviceReset());

    check(cudaStreamCreate(&stream));
    check(cudaMallocAsync(&gpu, 8, stream));
    printf("ptr: 0x%p\n", gpu);
    check(cudaMemcpyAsync(gpu, cpu, 8, cudaMemcpyHostToDevice, stream));
    check(cudaStreamSynchronize(stream));
    check(cudaStreamDestroy(stream));

    return 0;
 }
	#include <stdio.h>
	#include <cuda.h>

	#define check(ans) { _check((ans), __FILE__, __LINE__); }
	inline void _check(CUresult code, const char *file, int line)
	{
	if (code != CUDA_SUCCESS)
	{
	const char *name;
	cuGetErrorName(code, &name);
	fprintf(stderr,"ERROR %s at %s:%d\n", name, file, line);
	exit(code);
	}
	}

	int main() {
	check(cuInit(0));

	CUdevice dev;
	check(cuDeviceGet(&dev, 0));

	CUcontext ctx;
	check(cuDevicePrimaryCtxRetain(&ctx, dev));

	check(cuCtxSetCurrent(ctx));

	CUstream stream;
	check(cuStreamCreate(&stream, 0));

	CUdeviceptr ptr;
	check(cuMemAllocAsync(&ptr, 8, stream));

	// reset

	check(cuDevicePrimaryCtxReset_v2(dev));

	check(cuDevicePrimaryCtxRetain(&ctx, dev));

	check(cuCtxSetCurrent(ctx));

	check(cuStreamCreate(&stream, 0));

	check(cuMemAllocAsync(&ptr, 8, stream));
	void* cpu_ptr = malloc(8);
	check(cuMemcpyHtoDAsync_v2(ptr, cpu_ptr, 8, stream));
	check(cuStreamSynchronize(stream));

	return 0;
	}
	#include <stdio.h>

	#define check(ans) { _check((ans), __FILE__, __LINE__); }
	inline void _check(cudaError_t code, const char *file, int line)
	{
	if (code != cudaSuccess)
	{
	fprintf(stderr,"ERROR %s at %s:%d\n", cudaGetErrorName(code), file, line);
	exit(code);
	}
	}

	int main() {
	cudaStream_t stream;
	void cpu, gpu;

	cpu = malloc(8);

	check(cudaStreamCreate(&stream));
	check(cudaMallocAsync(&gpu, 8, stream));
	printf("ptr: 0x%p\n", gpu);
	check(cudaMemcpyAsync(gpu, cpu, 8, cudaMemcpyHostToDevice, stream));
	check(cudaStreamSynchronize(stream));
	check(cudaStreamDestroy(stream));

	check(cudaDeviceReset());

	check(cudaStreamCreate(&stream));
	check(cudaMallocAsync(&gpu, 8, stream));
	printf("ptr: 0x%p\n", gpu);
	check(cudaMemcpyAsync(gpu, cpu, 8, cudaMemcpyHostToDevice, stream));
	check(cudaStreamSynchronize(stream));
	check(cudaStreamDestroy(stream));

	return 0;
	}