Skip to content

Instantly share code, notes, and snippets.

@notoraptor
Last active September 20, 2017 20:09
Show Gist options
  • Save notoraptor/f6f8cd0471c66a9de227ba85a1c47d21 to your computer and use it in GitHub Desktop.
Save notoraptor/f6f8cd0471c66a9de227ba85a1c47d21 to your computer and use it in GitHub Desktop.
A script to reproduce a strange error with cudnn conv gradinput in Theano
from __future__ import absolute_import, print_function, division
import sys, math
import numpy as np
import theano
import theano.tests.unittest_tools as utt
from theano.gpuarray.basic_ops import infer_context_name, as_gpuarray_variable, gpu_contiguous, GpuAllocEmpty
from theano.gpuarray.dnn import GpuDnnConvDesc, GpuDnnConvGradI, get_precision
from theano.gpuarray.tests.config import mode_with_gpu, ref_cast
from theano.tensor.nnet.corr import CorrMM_gradInputs
from theano.tensor.nnet.abstract_conv import get_conv_output_shape, assert_conv_shape
from theano.tensor.opt import Assert
from theano.tensor.utils import hash_from_ndarray
def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mode='valid', subsample=(1, 1),
dilation=(1, 1), conv_mode='conv', algo=None, precision=None):
ctx_name = infer_context_name(kerns, topgrad)
kerns = gpu_contiguous(as_gpuarray_variable(kerns, ctx_name))
topgrad = gpu_contiguous(as_gpuarray_variable(topgrad, ctx_name))
img_shp = theano.tensor.as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape)
if beta == 0:
real_out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
else:
assert out is not None
out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
check = Assert('GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape')
real_out = check(out, theano.tensor.all(theano.tensor.eq(out.shape, img_shp)))
return GpuDnnConvGradI(algo=algo)(kerns, topgrad, real_out, desc, alpha, beta)
def _next_ten_exponent(val):
# Return exponent for the next ten power that follows val.
# val should be a positive integer.
# Examples:
# for 0 to 9, returns 1 (=> 10**1 == 10)
# for 10 to 99, returns 2 (=> 10**2 == 100)
ten_exponent = 1
while val // 10 > 0:
ten_exponent += 1
val //= 10
return ten_exponent
def scale_numpy_arrays_inplace(A, B, alpha):
scale_factor = 1
# Scale down simultaneously A and B if alpha is not 1.
if alpha != 1:
scale_factor *= alpha
# Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1)
max_a = math.floor(abs(A.max()))
max_b = math.floor(abs(B.max()))
if max_a or max_b:
m_a = _next_ten_exponent(max_a)
m_b = _next_ten_exponent(max_b)
max_m = max(m_a, m_b)
scale_factor *= 10 ** max_m
if scale_factor != 1:
A /= scale_factor
B /= scale_factor
def array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator=np.random.random):
out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation)
out_shp = assert_conv_shape(out_shp)
return allocator(out_shp).astype(dtype)
def run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.random.random):
inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters
if beta == 0:
inputs_val = None
else:
inputs_val = allocator(inputs_shape).astype(dtype)
inputs_val /= 10
filters_val = allocator(filters_shape).astype(dtype)
topgrad_val = array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator)
# Scale down the input values to prevent absolute errors in utt.assert_allclose.
filters_val /= 10
topgrad_val /= 10
filters = theano.shared(filters_val)
topgrad = theano.shared(topgrad_val)
# Compile a theano function for the cuDNN implementation
grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val,
border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, algo=algo, precision=precision)
f = theano.function([], grad_i, mode=mode_with_gpu)
# If conv_mode is 'conv' the reference implementation should use
# filters flipped according to the width, height and time axis
if conv_mode == 'conv':
if filters.ndim == 5:
flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
else:
flipped_filters = filters[:, :, ::-1, ::-1]
else:
flipped_filters = filters
# Compile a theano function for the reference implementation
grad_i_ref = CorrMM_gradInputs(border_mode=border_mode,
subsample=subsample,
filter_dilation=dilation
)(ref_cast(flipped_filters),
ref_cast(topgrad),
inputs_shape[2:])
f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")
# Compare the results of the two implementations
res_ref = f_ref()
res = np.asarray(f())
atol = 5e-2 if dtype == 'float16' else None
rtol = atol
if beta == 0:
cpu_res = alpha * res_ref
else:
cpu_res = alpha * res_ref + beta * inputs_val
print('Hash inputs_val :', None if inputs_val is None else hash_from_ndarray(inputs_val))
print('Hash filters_val :', hash_from_ndarray(filters_val))
print('Hash topgrad_val :', hash_from_ndarray(topgrad_val))
print('Hash CPU res before scaling:', hash_from_ndarray(cpu_res))
print('Hash res before scaling:', hash_from_ndarray(res))
scale_numpy_arrays_inplace(cpu_res, res, alpha)
print('Hash CPU res after scaling:', hash_from_ndarray(cpu_res))
print('Hash res after scaling:', hash_from_ndarray(res))
utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
print('CPU')
print(cpu_res.flatten()[:5], cpu_res.flatten()[-5:])
print('res')
print(res.flatten()[:5], res.flatten()[-5:])
algo = 'deterministic'
dtype = sys.argv[1] if len(sys.argv) > 1 else 'float16'
precision = dtype
parameters = (
(2, 3, 300, 5),
(2, 3, 40, 4),
(1, 1),
(1, 1),
(1, 1),
'conv',
2,
-3
)
print(algo, dtype, precision, parameters)
utt.seed_rng(1234)
run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
# run_conv_gradinput(algo, dtype, precision, parameters)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment