Created
April 10, 2014 19:25
-
-
Save benanne/10414443 to your computer and use it in GitHub Desktop.
CuFFT op for Theano based on scikits.cuda.fft
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import theano | |
| import theano.misc.pycuda_init | |
| import theano.sandbox.cuda as cuda | |
| from theano.misc.pycuda_utils import to_gpuarray, to_cudandarray | |
| from scikits.cuda import fft | |
| class CuFFTOp(cuda.GpuOp): | |
| def __eq__(self, other): | |
| return type(self) == type(other) | |
| def __hash__(self): | |
| return hash(type(self)) | |
| def __str__(self): | |
| return self.__class__.__name__ | |
| def make_node(self, inp): | |
| inp = cuda.basic_ops.gpu_contiguous( | |
| cuda.basic_ops.as_cuda_ndarray_variable(inp)) | |
| assert inp.dtype == "float32" | |
| otype = cuda.CudaNdarrayType(broadcastable=[False] * (inp.type.ndim + 1)) # add one extra dim for real/imag | |
| return theano.Apply(self, [inp], [otype()]) | |
| def make_thunk(self, node, storage_map, _, _2): | |
| inputs = [ storage_map[v] for v in node.inputs] | |
| outputs = [ storage_map[v] for v in node.outputs] | |
| # plan_input_shape = None | |
| # plan = None | |
| def thunk(): | |
| input_shape = inputs[0][0].shape | |
| output_shape = (input_shape[0], input_shape[1], input_shape[2] // 2 + 1, 2) # extra dimension with length 2 for real/imag | |
| z = outputs[0] | |
| # only allocate if there is no previous allocation of the right size. | |
| if z[0] is None or z[0].shape != output_shape: | |
| z[0] = cuda.CudaNdarray.zeros(output_shape) | |
| input_pycuda = to_gpuarray(inputs[0][0]) | |
| # I thought we'd need to change the type on output_pycuda so it is complex64, | |
| # but as it turns out scikits.cuda.fft doesn't really care either way and | |
| # treats the array as if it is complex64 anyway. | |
| output_pycuda = to_gpuarray(z[0]) | |
| # # only initialise plan if necessary | |
| # if plan is None or plan_input_shape != input_shape: | |
| # plan_input_shape = input_shape | |
| # plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0]) | |
| plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0]) | |
| fft.fft(input_pycuda, output_pycuda, plan) | |
| thunk.inputs = inputs | |
| thunk.outputs = outputs | |
| thunk.lazy = False | |
| return thunk |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment