larsoner · December 12, 2015 07:49
diff --git a/gistfile1.txt b/gistfile1.txt
 import numpy as np
 from scipy.signal import resample
 import pycuda.autoinit
 from pycuda import gpuarray
 from scikits.cuda import fft as cudafft
 from scipy.fftpack import fft, ifft
 try:
    import pyfftw
 except Exception:
    have_fftw = False
 else:
    have_fftw = True

 import pylab as pl

 old_len = 12
 new_len = 6
 double = True

 dtype_r = np.float64 if double is True else np.float32
 dtype_c = np.complex128 if double is True else np.complex64

 # make a repeatable random vector
 x = np.random.RandomState(0).randn(old_len).astype(dtype_r)

 # use scipy to resample it
 y_sp = resample(x, new_len)

 # use equivalent python implementation
 N = int(np.minimum(new_len, old_len))
 sl_1 = slice((N + 1) / 2)
 sl_2 = slice(-(N - 1) / 2, None)
 x_fft = fft(x).ravel()
 y_fft = np.zeros(new_len, np.complex128)
 y_fft[sl_1] = x_fft[sl_1]
 y_fft[sl_2] = x_fft[sl_2]
 y_py = np.real(ifft(y_fft, overwrite_x=True)).ravel()
 y_py *= (float(new_len) / old_len)

 if have_fftw:
    # use fftw
    # use fftw
    fftw_len_x = int((old_len - (old_len % 2)) / 2 + 1)
    fftw_len_y = int((new_len - (new_len % 2)) / 2 + 1)
    xy_fft = np.zeros(max(fftw_len_x, fftw_len_y), dtype_c)
    x_fft = xy_fft[:fftw_len_x]
    y_fft = xy_fft[:fftw_len_y]
    #y_fft = np.zeros((1, fftw_len_y), dtype_c)
    y_fw = np.zeros(new_len, dtype_r)
    y_fft.fill(0)
    fft_plan = pyfftw.FFTW(x, x_fft)
    ifft_plan = pyfftw.FFTW(y_fft, y_fw, direction='FFTW_BACKWARD')
    fft_plan.execute()
    #y_fft[0, sl_1] = x_fft[0, sl_1]
    ifft_plan.execute()
    y_fw /= float(old_len)
 else:
    y_fw = y_py

 # use (scikits.)CUDA
 fft_plan = cudafft.Plan(old_len, dtype_r, dtype_c)
 ifft_plan = cudafft.Plan(new_len, dtype_c, dtype_r)
 # this allocation is bigger than needed, but results didn't change when smaller
 xy_fft_gpu = gpuarray.to_gpu(np.zeros(max(old_len, new_len), dtype_c))
 x_gpu = gpuarray.to_gpu(x)
 y_gpu = gpuarray.to_gpu(np.zeros(new_len, dtype_r))
 cudafft.fft(x_gpu, xy_fft_gpu, fft_plan)
 cudafft.ifft(xy_fft_gpu, y_gpu, ifft_plan, scale=False)
 y_gpu = y_gpu.get() / float(old_len)

 pl.clf()
 assert len(y_sp) == len(y_py) == len(y_fw) == len(y_gpu)
 d = np.abs(fft(y_gpu) - fft(y_sp))
 pl.plot(d)
 print d
 pl.show()
 # note that scipy's implementation matches the python version
 assert np.allclose(y_sp, y_py)
 assert np.allclose(y_sp, y_fw)
 assert not np.allclose(y_sp, y_gpu)
 print [np.mean(xx) for xx in [y_py, y_sp, y_fw, y_gpu]]
	import numpy as np
	from scipy.signal import resample
	import pycuda.autoinit
	from pycuda import gpuarray
	from scikits.cuda import fft as cudafft
	from scipy.fftpack import fft, ifft
	try:
	import pyfftw
	except Exception:
	have_fftw = False
	else:
	have_fftw = True

	import pylab as pl

	old_len = 12
	new_len = 6
	double = True

	dtype_r = np.float64 if double is True else np.float32
	dtype_c = np.complex128 if double is True else np.complex64

	# make a repeatable random vector
	x = np.random.RandomState(0).randn(old_len).astype(dtype_r)

	# use scipy to resample it
	y_sp = resample(x, new_len)

	# use equivalent python implementation
	N = int(np.minimum(new_len, old_len))
	sl_1 = slice((N + 1) / 2)
	sl_2 = slice(-(N - 1) / 2, None)
	x_fft = fft(x).ravel()
	y_fft = np.zeros(new_len, np.complex128)
	y_fft[sl_1] = x_fft[sl_1]
	y_fft[sl_2] = x_fft[sl_2]
	y_py = np.real(ifft(y_fft, overwrite_x=True)).ravel()
	y_py *= (float(new_len) / old_len)

	if have_fftw:
	# use fftw
	# use fftw
	fftw_len_x = int((old_len - (old_len % 2)) / 2 + 1)
	fftw_len_y = int((new_len - (new_len % 2)) / 2 + 1)
	xy_fft = np.zeros(max(fftw_len_x, fftw_len_y), dtype_c)
	x_fft = xy_fft[:fftw_len_x]
	y_fft = xy_fft[:fftw_len_y]
	#y_fft = np.zeros((1, fftw_len_y), dtype_c)
	y_fw = np.zeros(new_len, dtype_r)
	y_fft.fill(0)
	fft_plan = pyfftw.FFTW(x, x_fft)
	ifft_plan = pyfftw.FFTW(y_fft, y_fw, direction='FFTW_BACKWARD')
	fft_plan.execute()
	#y_fft[0, sl_1] = x_fft[0, sl_1]
	ifft_plan.execute()
	y_fw /= float(old_len)
	else:
	y_fw = y_py

	# use (scikits.)CUDA
	fft_plan = cudafft.Plan(old_len, dtype_r, dtype_c)
	ifft_plan = cudafft.Plan(new_len, dtype_c, dtype_r)
	# this allocation is bigger than needed, but results didn't change when smaller
	xy_fft_gpu = gpuarray.to_gpu(np.zeros(max(old_len, new_len), dtype_c))
	x_gpu = gpuarray.to_gpu(x)
	y_gpu = gpuarray.to_gpu(np.zeros(new_len, dtype_r))
	cudafft.fft(x_gpu, xy_fft_gpu, fft_plan)
	cudafft.ifft(xy_fft_gpu, y_gpu, ifft_plan, scale=False)
	y_gpu = y_gpu.get() / float(old_len)

	pl.clf()
	assert len(y_sp) == len(y_py) == len(y_fw) == len(y_gpu)
	d = np.abs(fft(y_gpu) - fft(y_sp))
	pl.plot(d)
	print d
	pl.show()
	# note that scipy's implementation matches the python version
	assert np.allclose(y_sp, y_py)
	assert np.allclose(y_sp, y_fw)
	assert not np.allclose(y_sp, y_gpu)
	print [np.mean(xx) for xx in [y_py, y_sp, y_fw, y_gpu]]