Created
November 8, 2012 03:57
Revisions
-
fasiha created this gist
Nov 8, 2012 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,56 @@ #!/usr/bin/env python """ Compare performance of using host-registered pinned and unpinned host memory, with more than one block for larger arrays, and with unpinned tried first. """ import numpy as np import pycuda.autoinit import pycuda.driver as drv from pycuda.compiler import SourceModule from time import time increment_mod = SourceModule(""" __global__ void increment(double *a, int N) { int idx = threadIdx.x + blockIdx.x*blockDim.x; if (idx < N) a[idx] = a[idx]+1; } """) increment = increment_mod.get_function("increment") N = 23 # breaks. Works if <= 22 M = 3 # Time use of pageable host memory: x = np.empty((N, N), np.float64) times = np.empty(M) for i in xrange(M): x[:, :] = np.random.rand(N, N) x_orig = x.copy() start = time() increment(drv.InOut(x), np.uint32(x.size), block=(512, 1, 1),grid=(int(np.ceil(N*N/float(512))),1,1)) times[i] = time()-start assert np.allclose(x_orig + 1, x) print "Average kernel execution time with pageable memory: %3.7f" % np.mean(times) # Time use of pinned host memory: x = drv.aligned_empty((N, N), dtype=np.float64, order='C') x = drv.register_host_memory(x, flags=drv.mem_host_register_flags.DEVICEMAP) x_gpu_ptr = np.intp(x.base.get_device_pointer()) times = np.empty(M) for i in xrange(M): x[:, :] = np.random.rand(N, N) x_orig = x.copy() start = time() increment(x_gpu_ptr, np.uint32(x.size), block=(512, 1, 1), grid=(int(np.ceil(N*N/float(512))),1,1)) times[i] = time()-start assert np.allclose(x_orig + 1, x) print "Average kernel execution time with pinned memory: %3.7f" % np.mean(times)