Skip to content

Instantly share code, notes, and snippets.

@fasiha
Created November 8, 2012 03:57

Revisions

  1. fasiha created this gist Nov 8, 2012.
    56 changes: 56 additions & 0 deletions test_pinned_register_v2.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,56 @@
    #!/usr/bin/env python

    """
    Compare performance of using host-registered pinned and unpinned host memory,
    with more than one block for larger arrays, and with unpinned tried first.
    """

    import numpy as np

    import pycuda.autoinit
    import pycuda.driver as drv
    from pycuda.compiler import SourceModule

    from time import time

    increment_mod = SourceModule("""
    __global__ void increment(double *a, int N)
    {
    int idx = threadIdx.x + blockIdx.x*blockDim.x;
    if (idx < N)
    a[idx] = a[idx]+1;
    }
    """)
    increment = increment_mod.get_function("increment")

    N = 23 # breaks. Works if <= 22
    M = 3

    # Time use of pageable host memory:
    x = np.empty((N, N), np.float64)

    times = np.empty(M)
    for i in xrange(M):
    x[:, :] = np.random.rand(N, N)
    x_orig = x.copy()
    start = time()
    increment(drv.InOut(x), np.uint32(x.size), block=(512, 1, 1),grid=(int(np.ceil(N*N/float(512))),1,1))
    times[i] = time()-start
    assert np.allclose(x_orig + 1, x)

    print "Average kernel execution time with pageable memory: %3.7f" % np.mean(times)

    # Time use of pinned host memory:
    x = drv.aligned_empty((N, N), dtype=np.float64, order='C')
    x = drv.register_host_memory(x, flags=drv.mem_host_register_flags.DEVICEMAP)
    x_gpu_ptr = np.intp(x.base.get_device_pointer())

    times = np.empty(M)
    for i in xrange(M):
    x[:, :] = np.random.rand(N, N)
    x_orig = x.copy()
    start = time()
    increment(x_gpu_ptr, np.uint32(x.size), block=(512, 1, 1), grid=(int(np.ceil(N*N/float(512))),1,1))
    times[i] = time()-start
    assert np.allclose(x_orig + 1, x)
    print "Average kernel execution time with pinned memory: %3.7f" % np.mean(times)