Last active
September 20, 2017 20:09
-
-
Save notoraptor/f6f8cd0471c66a9de227ba85a1c47d21 to your computer and use it in GitHub Desktop.
A script to reproduce a strange error with cudnn conv gradinput in Theano
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import, print_function, division | |
import sys, math | |
import numpy as np | |
import theano | |
import theano.tests.unittest_tools as utt | |
from theano.gpuarray.basic_ops import infer_context_name, as_gpuarray_variable, gpu_contiguous, GpuAllocEmpty | |
from theano.gpuarray.dnn import GpuDnnConvDesc, GpuDnnConvGradI, get_precision | |
from theano.gpuarray.tests.config import mode_with_gpu, ref_cast | |
from theano.tensor.nnet.corr import CorrMM_gradInputs | |
from theano.tensor.nnet.abstract_conv import get_conv_output_shape, assert_conv_shape | |
from theano.tensor.opt import Assert | |
from theano.tensor.utils import hash_from_ndarray | |
def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mode='valid', subsample=(1, 1), | |
dilation=(1, 1), conv_mode='conv', algo=None, precision=None): | |
ctx_name = infer_context_name(kerns, topgrad) | |
kerns = gpu_contiguous(as_gpuarray_variable(kerns, ctx_name)) | |
topgrad = gpu_contiguous(as_gpuarray_variable(topgrad, ctx_name)) | |
img_shp = theano.tensor.as_tensor_variable(img_shp) | |
precision = get_precision(precision, [kerns, topgrad]) | |
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, | |
conv_mode=conv_mode, precision=precision)(kerns.shape) | |
if beta == 0: | |
real_out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp) | |
else: | |
assert out is not None | |
out = gpu_contiguous(as_gpuarray_variable(out, ctx_name)) | |
check = Assert('GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape') | |
real_out = check(out, theano.tensor.all(theano.tensor.eq(out.shape, img_shp))) | |
return GpuDnnConvGradI(algo=algo)(kerns, topgrad, real_out, desc, alpha, beta) | |
def _next_ten_exponent(val): | |
# Return exponent for the next ten power that follows val. | |
# val should be a positive integer. | |
# Examples: | |
# for 0 to 9, returns 1 (=> 10**1 == 10) | |
# for 10 to 99, returns 2 (=> 10**2 == 100) | |
ten_exponent = 1 | |
while val // 10 > 0: | |
ten_exponent += 1 | |
val //= 10 | |
return ten_exponent | |
def scale_numpy_arrays_inplace(A, B, alpha): | |
scale_factor = 1 | |
# Scale down simultaneously A and B if alpha is not 1. | |
if alpha != 1: | |
scale_factor *= alpha | |
# Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1) | |
max_a = math.floor(abs(A.max())) | |
max_b = math.floor(abs(B.max())) | |
if max_a or max_b: | |
m_a = _next_ten_exponent(max_a) | |
m_b = _next_ten_exponent(max_b) | |
max_m = max(m_a, m_b) | |
scale_factor *= 10 ** max_m | |
if scale_factor != 1: | |
A /= scale_factor | |
B /= scale_factor | |
def array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator=np.random.random): | |
out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation) | |
out_shp = assert_conv_shape(out_shp) | |
return allocator(out_shp).astype(dtype) | |
def run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.random.random): | |
inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters | |
if beta == 0: | |
inputs_val = None | |
else: | |
inputs_val = allocator(inputs_shape).astype(dtype) | |
inputs_val /= 10 | |
filters_val = allocator(filters_shape).astype(dtype) | |
topgrad_val = array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator) | |
# Scale down the input values to prevent absolute errors in utt.assert_allclose. | |
filters_val /= 10 | |
topgrad_val /= 10 | |
filters = theano.shared(filters_val) | |
topgrad = theano.shared(topgrad_val) | |
# Compile a theano function for the cuDNN implementation | |
grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val, | |
border_mode=border_mode, subsample=subsample, dilation=dilation, | |
conv_mode=conv_mode, algo=algo, precision=precision) | |
f = theano.function([], grad_i, mode=mode_with_gpu) | |
# If conv_mode is 'conv' the reference implementation should use | |
# filters flipped according to the width, height and time axis | |
if conv_mode == 'conv': | |
if filters.ndim == 5: | |
flipped_filters = filters[:, :, ::-1, ::-1, ::-1] | |
else: | |
flipped_filters = filters[:, :, ::-1, ::-1] | |
else: | |
flipped_filters = filters | |
# Compile a theano function for the reference implementation | |
grad_i_ref = CorrMM_gradInputs(border_mode=border_mode, | |
subsample=subsample, | |
filter_dilation=dilation | |
)(ref_cast(flipped_filters), | |
ref_cast(topgrad), | |
inputs_shape[2:]) | |
f_ref = theano.function([], grad_i_ref, mode="FAST_RUN") | |
# Compare the results of the two implementations | |
res_ref = f_ref() | |
res = np.asarray(f()) | |
atol = 5e-2 if dtype == 'float16' else None | |
rtol = atol | |
if beta == 0: | |
cpu_res = alpha * res_ref | |
else: | |
cpu_res = alpha * res_ref + beta * inputs_val | |
print('Hash inputs_val :', None if inputs_val is None else hash_from_ndarray(inputs_val)) | |
print('Hash filters_val :', hash_from_ndarray(filters_val)) | |
print('Hash topgrad_val :', hash_from_ndarray(topgrad_val)) | |
print('Hash CPU res before scaling:', hash_from_ndarray(cpu_res)) | |
print('Hash res before scaling:', hash_from_ndarray(res)) | |
scale_numpy_arrays_inplace(cpu_res, res, alpha) | |
print('Hash CPU res after scaling:', hash_from_ndarray(cpu_res)) | |
print('Hash res after scaling:', hash_from_ndarray(res)) | |
utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol) | |
print('CPU') | |
print(cpu_res.flatten()[:5], cpu_res.flatten()[-5:]) | |
print('res') | |
print(res.flatten()[:5], res.flatten()[-5:]) | |
algo = 'deterministic' | |
dtype = sys.argv[1] if len(sys.argv) > 1 else 'float16' | |
precision = dtype | |
parameters = ( | |
(2, 3, 300, 5), | |
(2, 3, 40, 4), | |
(1, 1), | |
(1, 1), | |
(1, 1), | |
'conv', | |
2, | |
-3 | |
) | |
print(algo, dtype, precision, parameters) | |
utt.seed_rng(1234) | |
run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones) | |
run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones) | |
# run_conv_gradinput(algo, dtype, precision, parameters) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment