benanne · April 10, 2014 19:25
diff --git a/cufftop.py b/cufftop.py
 import numpy as np 
 import theano

 import theano.misc.pycuda_init
 import theano.sandbox.cuda as cuda
 from theano.misc.pycuda_utils import to_gpuarray, to_cudandarray

 from scikits.cuda import fft


 class CuFFTOp(cuda.GpuOp):
    def __eq__(self, other):
        return type(self) == type(other)

    def __hash__(self):
        return hash(type(self))

    def __str__(self):
        return self.__class__.__name__

    def make_node(self, inp):
        inp = cuda.basic_ops.gpu_contiguous(
           cuda.basic_ops.as_cuda_ndarray_variable(inp))

        assert inp.dtype == "float32"

        otype = cuda.CudaNdarrayType(broadcastable=[False] * (inp.type.ndim + 1)) # add one extra dim for real/imag

        return theano.Apply(self, [inp], [otype()])

    def make_thunk(self, node, storage_map, _, _2):
        inputs = [ storage_map[v] for v in node.inputs]
        outputs = [ storage_map[v] for v in node.outputs]

        # plan_input_shape = None
        # plan = None

        def thunk():
            input_shape = inputs[0][0].shape
            output_shape = (input_shape[0], input_shape[1], input_shape[2] // 2 + 1, 2) # extra dimension with length 2 for real/imag

            z = outputs[0]

            # only allocate if there is no previous allocation of the right size.
            if z[0] is None or z[0].shape != output_shape:
                z[0] = cuda.CudaNdarray.zeros(output_shape)

            input_pycuda = to_gpuarray(inputs[0][0])
            # I thought we'd need to change the type on output_pycuda so it is complex64,
            # but as it turns out scikits.cuda.fft doesn't really care either way and
            # treats the array as if it is complex64 anyway.
            output_pycuda = to_gpuarray(z[0])

            # # only initialise plan if necessary
            # if plan is None or plan_input_shape != input_shape:
            #     plan_input_shape = input_shape
            #     plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0])
            plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0])

            fft.fft(input_pycuda, output_pycuda, plan)


        thunk.inputs = inputs
        thunk.outputs = outputs
        thunk.lazy = False

        return thunk
	import numpy as np
	import theano

	import theano.misc.pycuda_init
	import theano.sandbox.cuda as cuda
	from theano.misc.pycuda_utils import to_gpuarray, to_cudandarray

	from scikits.cuda import fft


	class CuFFTOp(cuda.GpuOp):
	def __eq__(self, other):
	return type(self) == type(other)

	def __hash__(self):
	return hash(type(self))

	def __str__(self):
	return self.__class__.__name__

	def make_node(self, inp):
	inp = cuda.basic_ops.gpu_contiguous(
	cuda.basic_ops.as_cuda_ndarray_variable(inp))

	assert inp.dtype == "float32"

	otype = cuda.CudaNdarrayType(broadcastable=[False] * (inp.type.ndim + 1)) # add one extra dim for real/imag

	return theano.Apply(self, [inp], [otype()])

	def make_thunk(self, node, storage_map, _, _2):
	inputs = [ storage_map[v] for v in node.inputs]
	outputs = [ storage_map[v] for v in node.outputs]

	# plan_input_shape = None
	# plan = None

	def thunk():
	input_shape = inputs[0][0].shape
	output_shape = (input_shape[0], input_shape[1], input_shape[2] // 2 + 1, 2) # extra dimension with length 2 for real/imag

	z = outputs[0]

	# only allocate if there is no previous allocation of the right size.
	if z[0] is None or z[0].shape != output_shape:
	z[0] = cuda.CudaNdarray.zeros(output_shape)

	input_pycuda = to_gpuarray(inputs[0][0])
	# I thought we'd need to change the type on output_pycuda so it is complex64,
	# but as it turns out scikits.cuda.fft doesn't really care either way and
	# treats the array as if it is complex64 anyway.
	output_pycuda = to_gpuarray(z[0])

	# # only initialise plan if necessary
	# if plan is None or plan_input_shape != input_shape:
	# plan_input_shape = input_shape
	# plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0])
	plan = fft.Plan((input_shape[1], input_shape[2]), np.float32, np.complex64, batch=input_shape[0])

	fft.fft(input_pycuda, output_pycuda, plan)


	thunk.inputs = inputs
	thunk.outputs = outputs
	thunk.lazy = False

	return thunk
No results found