Skip to content

Instantly share code, notes, and snippets.

@maedoc
Created April 21, 2019 21:29
Show Gist options
  • Select an option

  • Save maedoc/2414fb534fa7ce7561071ce87113987e to your computer and use it in GitHub Desktop.

Select an option

Save maedoc/2414fb534fa7ce7561071ce87113987e to your computer and use it in GitHub Desktop.
recurrence analysis with Numba & CUDA
"""Implements recurrence analysis with CUDA.
"""
import os, glob
# not needed if using conda
os.environ['NUMBAPRO_NVVM'], = glob.glob('c:/program files/nvidia*/cuda/*/nvvm/bin/nvvm64*.dll')
os.environ['NUMBAPRO_LIBDEVICE'], = glob.glob('c:/program files/nvidia*/cuda/*/nvvm/libdevice')
import math
import numpy as np
from numba import cuda
import numba as nb
cu32 = lambda a: np.ascontiguousarray(a.astype(np.uint32))
@cuda.jit
def _sseij(Y, I, J, O):
# strides
sty = cuda.blockDim.x
sbx = sty * cuda.blockDim.y
sby = sbx * cuda.gridDim.x
sbz = sby * cuda.gridDim.y
# this thread's index
t = (cuda.threadIdx.x
+ cuda.threadIdx.y * sty
+ cuda.blockIdx.x * sbx
+ cuda.blockIdx.y * sby
+ cuda.blockIdx.z * sbz)
if t < I.size:
i = I[t]
j = J[t]
x = nb.float32(0.0)
for k in range(Y.shape[0]):
x += (Y[k, i] - Y[k, j])**2
O[t] = math.sqrt(x)
def ra(Y):
"""Perform recurrence analysis for series of matrices
Y size (n_mat, m, n).
"""
Y = Y.astype('f')
nw, ns, nf = Y.shape
# simd friendly form
Y_ = np.ascontiguousarray(
Y.reshape((nw, -1)).T)
assert Y_.strides == (nw*4, 4) # (ns*nf, nw)
i, j = map(cu32, np.tril_indices(nw))
o = np.zeros((i.size, ), 'f')
ntt = o.size
ngz = (ntt // (32**4)) + 1
grid, block = (32, 32, ngz), (32, 32)
assert ntt < (np.prod(grid)*np.prod(block))
print('hold on, launching...')
_sseij[grid, block](Y_, i, j, o)
print('kernel done')
# compute dense output
O = np.zeros((nw, nw), 'f')
O[i, j] = o
O += O.T
return O
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment