Skip to content

Instantly share code, notes, and snippets.

@larsoner
Last active December 12, 2015 07:49
Show Gist options
  • Save larsoner/4739735 to your computer and use it in GitHub Desktop.
Save larsoner/4739735 to your computer and use it in GitHub Desktop.
CUDA DC component mismatch
import numpy as np
from scipy.signal import resample
import pycuda.autoinit
from pycuda import gpuarray
from scikits.cuda import fft as cudafft
from scipy.fftpack import fft, ifft
try:
import pyfftw
except Exception:
have_fftw = False
else:
have_fftw = True
import pylab as pl
old_len = 12
new_len = 6
double = True
dtype_r = np.float64 if double is True else np.float32
dtype_c = np.complex128 if double is True else np.complex64
# make a repeatable random vector
x = np.random.RandomState(0).randn(old_len).astype(dtype_r)
# use scipy to resample it
y_sp = resample(x, new_len)
# use equivalent python implementation
N = int(np.minimum(new_len, old_len))
sl_1 = slice((N + 1) / 2)
sl_2 = slice(-(N - 1) / 2, None)
x_fft = fft(x).ravel()
y_fft = np.zeros(new_len, np.complex128)
y_fft[sl_1] = x_fft[sl_1]
y_fft[sl_2] = x_fft[sl_2]
y_py = np.real(ifft(y_fft, overwrite_x=True)).ravel()
y_py *= (float(new_len) / old_len)
if have_fftw:
# use fftw
# use fftw
fftw_len_x = int((old_len - (old_len % 2)) / 2 + 1)
fftw_len_y = int((new_len - (new_len % 2)) / 2 + 1)
xy_fft = np.zeros(max(fftw_len_x, fftw_len_y), dtype_c)
x_fft = xy_fft[:fftw_len_x]
y_fft = xy_fft[:fftw_len_y]
#y_fft = np.zeros((1, fftw_len_y), dtype_c)
y_fw = np.zeros(new_len, dtype_r)
y_fft.fill(0)
fft_plan = pyfftw.FFTW(x, x_fft)
ifft_plan = pyfftw.FFTW(y_fft, y_fw, direction='FFTW_BACKWARD')
fft_plan.execute()
#y_fft[0, sl_1] = x_fft[0, sl_1]
ifft_plan.execute()
y_fw /= float(old_len)
else:
y_fw = y_py
# use (scikits.)CUDA
fft_plan = cudafft.Plan(old_len, dtype_r, dtype_c)
ifft_plan = cudafft.Plan(new_len, dtype_c, dtype_r)
# this allocation is bigger than needed, but results didn't change when smaller
xy_fft_gpu = gpuarray.to_gpu(np.zeros(max(old_len, new_len), dtype_c))
x_gpu = gpuarray.to_gpu(x)
y_gpu = gpuarray.to_gpu(np.zeros(new_len, dtype_r))
cudafft.fft(x_gpu, xy_fft_gpu, fft_plan)
cudafft.ifft(xy_fft_gpu, y_gpu, ifft_plan, scale=False)
y_gpu = y_gpu.get() / float(old_len)
pl.clf()
assert len(y_sp) == len(y_py) == len(y_fw) == len(y_gpu)
d = np.abs(fft(y_gpu) - fft(y_sp))
pl.plot(d)
print d
pl.show()
# note that scipy's implementation matches the python version
assert np.allclose(y_sp, y_py)
assert np.allclose(y_sp, y_fw)
assert not np.allclose(y_sp, y_gpu)
print [np.mean(xx) for xx in [y_py, y_sp, y_fw, y_gpu]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment