Skip to content

Instantly share code, notes, and snippets.

@benanne
Created April 10, 2014 19:25
Show Gist options
  • Select an option

  • Save benanne/10414443 to your computer and use it in GitHub Desktop.

Select an option

Save benanne/10414443 to your computer and use it in GitHub Desktop.
CuFFT op for Theano based on scikits.cuda.fft
import numpy as np
import theano
import theano.misc.pycuda_init
import theano.sandbox.cuda as cuda
from theano.misc.pycuda_utils import to_gpuarray, to_cudandarray
from scikits.cuda import fft
class CuFFTOp(cuda.GpuOp):
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def make_node(self, inp):
inp = cuda.basic_ops.gpu_contiguous(
cuda.basic_ops.as_cuda_ndarray_variable(inp))
assert inp.dtype == "float32"
otype = cuda.CudaNdarrayType(broadcastable=[False] * (inp.type.ndim + 1)) # add one extra dim for real/imag
return theano.Apply(self, [inp], [otype()])
def make_thunk(self, node, storage_map, _, _2):
inputs = [ storage_map[v] for v in node.inputs]
outputs = [ storage_map[v] for v in node.outputs]
# plan_input_shape = None
# plan = None
def thunk():
input_shape = inputs[0][0].shape
output_shape = (input_shape[0], input_shape[1], input_shape[2] // 2 + 1, 2) # extra dimension with length 2 for real/imag
z = outputs[0]
# only allocate if there is no previous allocation of the right size.
if z[0] is None or z[0].shape != output_shape:
z[0] = cuda.CudaNdarray.zeros(output_shape)
input_pycuda = to_gpuarray(inputs[0][0])
# I thought we'd need to change the type on output_pycuda so it is complex64,
# but as it turns out scikits.cuda.fft doesn't really care either way and
# treats the array as if it is complex64 anyway.
output_pycuda = to_gpuarray(z[0])
# # only initialise plan if necessary
# if plan is None or plan_input_shape != input_shape:
# plan_input_shape = input_shape
# plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0])
plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0])
fft.fft(input_pycuda, output_pycuda, plan)
thunk.inputs = inputs
thunk.outputs = outputs
thunk.lazy = False
return thunk
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment