Last active
December 12, 2015 07:49
-
-
Save larsoner/4739735 to your computer and use it in GitHub Desktop.
CUDA DC component mismatch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.signal import resample | |
import pycuda.autoinit | |
from pycuda import gpuarray | |
from scikits.cuda import fft as cudafft | |
from scipy.fftpack import fft, ifft | |
try: | |
import pyfftw | |
except Exception: | |
have_fftw = False | |
else: | |
have_fftw = True | |
import pylab as pl | |
old_len = 12 | |
new_len = 6 | |
double = True | |
dtype_r = np.float64 if double is True else np.float32 | |
dtype_c = np.complex128 if double is True else np.complex64 | |
# make a repeatable random vector | |
x = np.random.RandomState(0).randn(old_len).astype(dtype_r) | |
# use scipy to resample it | |
y_sp = resample(x, new_len) | |
# use equivalent python implementation | |
N = int(np.minimum(new_len, old_len)) | |
sl_1 = slice((N + 1) / 2) | |
sl_2 = slice(-(N - 1) / 2, None) | |
x_fft = fft(x).ravel() | |
y_fft = np.zeros(new_len, np.complex128) | |
y_fft[sl_1] = x_fft[sl_1] | |
y_fft[sl_2] = x_fft[sl_2] | |
y_py = np.real(ifft(y_fft, overwrite_x=True)).ravel() | |
y_py *= (float(new_len) / old_len) | |
if have_fftw: | |
# use fftw | |
# use fftw | |
fftw_len_x = int((old_len - (old_len % 2)) / 2 + 1) | |
fftw_len_y = int((new_len - (new_len % 2)) / 2 + 1) | |
xy_fft = np.zeros(max(fftw_len_x, fftw_len_y), dtype_c) | |
x_fft = xy_fft[:fftw_len_x] | |
y_fft = xy_fft[:fftw_len_y] | |
#y_fft = np.zeros((1, fftw_len_y), dtype_c) | |
y_fw = np.zeros(new_len, dtype_r) | |
y_fft.fill(0) | |
fft_plan = pyfftw.FFTW(x, x_fft) | |
ifft_plan = pyfftw.FFTW(y_fft, y_fw, direction='FFTW_BACKWARD') | |
fft_plan.execute() | |
#y_fft[0, sl_1] = x_fft[0, sl_1] | |
ifft_plan.execute() | |
y_fw /= float(old_len) | |
else: | |
y_fw = y_py | |
# use (scikits.)CUDA | |
fft_plan = cudafft.Plan(old_len, dtype_r, dtype_c) | |
ifft_plan = cudafft.Plan(new_len, dtype_c, dtype_r) | |
# this allocation is bigger than needed, but results didn't change when smaller | |
xy_fft_gpu = gpuarray.to_gpu(np.zeros(max(old_len, new_len), dtype_c)) | |
x_gpu = gpuarray.to_gpu(x) | |
y_gpu = gpuarray.to_gpu(np.zeros(new_len, dtype_r)) | |
cudafft.fft(x_gpu, xy_fft_gpu, fft_plan) | |
cudafft.ifft(xy_fft_gpu, y_gpu, ifft_plan, scale=False) | |
y_gpu = y_gpu.get() / float(old_len) | |
pl.clf() | |
assert len(y_sp) == len(y_py) == len(y_fw) == len(y_gpu) | |
d = np.abs(fft(y_gpu) - fft(y_sp)) | |
pl.plot(d) | |
print d | |
pl.show() | |
# note that scipy's implementation matches the python version | |
assert np.allclose(y_sp, y_py) | |
assert np.allclose(y_sp, y_fw) | |
assert not np.allclose(y_sp, y_gpu) | |
print [np.mean(xx) for xx in [y_py, y_sp, y_fw, y_gpu]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment