notoraptor · September 20, 2017 20:09
diff --git a/test_dnn_float16.py b/test_dnn_float16.py
 from __future__ import absolute_import, print_function, division

 import sys, math
 import numpy as np
 import theano
 import theano.tests.unittest_tools as utt
 from theano.gpuarray.basic_ops import infer_context_name, as_gpuarray_variable, gpu_contiguous, GpuAllocEmpty
 from theano.gpuarray.dnn import GpuDnnConvDesc, GpuDnnConvGradI, get_precision
 from theano.gpuarray.tests.config import mode_with_gpu, ref_cast
 from theano.tensor.nnet.corr import CorrMM_gradInputs
 from theano.tensor.nnet.abstract_conv import get_conv_output_shape, assert_conv_shape
 from theano.tensor.opt import Assert
 from theano.tensor.utils import hash_from_ndarray

 def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mode='valid', subsample=(1, 1),
                  dilation=(1, 1), conv_mode='conv', algo=None, precision=None):
    ctx_name = infer_context_name(kerns, topgrad)

    kerns = gpu_contiguous(as_gpuarray_variable(kerns, ctx_name))
    topgrad = gpu_contiguous(as_gpuarray_variable(topgrad, ctx_name))
    img_shp = theano.tensor.as_tensor_variable(img_shp)

    precision = get_precision(precision, [kerns, topgrad])
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
                          conv_mode=conv_mode, precision=precision)(kerns.shape)
    if beta == 0:
        real_out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
    else:
        assert out is not None
        out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
        check = Assert('GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape')
        real_out = check(out, theano.tensor.all(theano.tensor.eq(out.shape, img_shp)))
    return GpuDnnConvGradI(algo=algo)(kerns, topgrad, real_out, desc, alpha, beta)

 def _next_ten_exponent(val):
    # Return exponent for the next ten power that follows val.
    # val should be a positive integer.
    # Examples:
    # for 0 to 9, returns 1 (=> 10**1 == 10)
    # for 10 to 99, returns 2 (=> 10**2 == 100)
    ten_exponent = 1
    while val // 10 > 0:
        ten_exponent += 1
        val //= 10
    return ten_exponent

 def scale_numpy_arrays_inplace(A, B, alpha):
    scale_factor = 1
    # Scale down simultaneously A and B if alpha is not 1.
    if alpha != 1:
        scale_factor *= alpha
    # Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1)
    max_a = math.floor(abs(A.max()))
    max_b = math.floor(abs(B.max()))
    if max_a or max_b:
        m_a = _next_ten_exponent(max_a)
        m_b = _next_ten_exponent(max_b)
        max_m = max(m_a, m_b)
        scale_factor *= 10 ** max_m
    if scale_factor != 1:
        A /= scale_factor
        B /= scale_factor

 def array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator=np.random.random):
    out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation)
    out_shp = assert_conv_shape(out_shp)
    return allocator(out_shp).astype(dtype)

 def run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.random.random):
    inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters

    if beta == 0:
        inputs_val = None
    else:
        inputs_val = allocator(inputs_shape).astype(dtype)
        inputs_val /= 10
    filters_val = allocator(filters_shape).astype(dtype)
    topgrad_val = array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator)

    # Scale down the input values to prevent absolute errors in utt.assert_allclose.
    filters_val /= 10
    topgrad_val /= 10

    filters = theano.shared(filters_val)
    topgrad = theano.shared(topgrad_val)

    # Compile a theano function for the cuDNN implementation
    grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val,
                           border_mode=border_mode, subsample=subsample, dilation=dilation,
                           conv_mode=conv_mode, algo=algo, precision=precision)

    f = theano.function([], grad_i, mode=mode_with_gpu)

    # If conv_mode is 'conv' the reference implementation should use
    # filters flipped according to the width, height and time axis
    if conv_mode == 'conv':
        if filters.ndim == 5:
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters[:, :, ::-1, ::-1]
    else:
        flipped_filters = filters

    # Compile a theano function for the reference implementation
    grad_i_ref = CorrMM_gradInputs(border_mode=border_mode,
                                   subsample=subsample,
                                   filter_dilation=dilation
                                   )(ref_cast(flipped_filters),
                                     ref_cast(topgrad),
                                     inputs_shape[2:])
    f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")

    # Compare the results of the two implementations
    res_ref = f_ref()
    res = np.asarray(f())

    atol = 5e-2 if dtype == 'float16' else None
    rtol = atol
    if beta == 0:
        cpu_res = alpha * res_ref
    else:
        cpu_res = alpha * res_ref + beta * inputs_val
    print('Hash inputs_val            :', None if inputs_val is None else hash_from_ndarray(inputs_val))
    print('Hash filters_val           :', hash_from_ndarray(filters_val))
    print('Hash topgrad_val           :', hash_from_ndarray(topgrad_val))
    print('Hash CPU res before scaling:', hash_from_ndarray(cpu_res))
    print('Hash     res before scaling:', hash_from_ndarray(res))
    scale_numpy_arrays_inplace(cpu_res, res, alpha)
    print('Hash CPU res after  scaling:', hash_from_ndarray(cpu_res))
    print('Hash     res after  scaling:', hash_from_ndarray(res))
    utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
    print('CPU')
    print(cpu_res.flatten()[:5], cpu_res.flatten()[-5:])
    print('res')
    print(res.flatten()[:5], res.flatten()[-5:])

 algo = 'deterministic'
 dtype = sys.argv[1] if len(sys.argv) > 1 else 'float16'
 precision = dtype
 parameters = (
    (2, 3, 300, 5),
    (2, 3, 40, 4),
    (1, 1),
    (1, 1),
    (1, 1),
    'conv',
    2,
    -3
 )
 print(algo, dtype, precision, parameters)
 utt.seed_rng(1234)
 run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
 run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
 # run_conv_gradinput(algo, dtype, precision, parameters)
	from __future__ import absolute_import, print_function, division

	import sys, math
	import numpy as np
	import theano
	import theano.tests.unittest_tools as utt
	from theano.gpuarray.basic_ops import infer_context_name, as_gpuarray_variable, gpu_contiguous, GpuAllocEmpty
	from theano.gpuarray.dnn import GpuDnnConvDesc, GpuDnnConvGradI, get_precision
	from theano.gpuarray.tests.config import mode_with_gpu, ref_cast
	from theano.tensor.nnet.corr import CorrMM_gradInputs
	from theano.tensor.nnet.abstract_conv import get_conv_output_shape, assert_conv_shape
	from theano.tensor.opt import Assert
	from theano.tensor.utils import hash_from_ndarray

	def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mode='valid', subsample=(1, 1),
	dilation=(1, 1), conv_mode='conv', algo=None, precision=None):
	ctx_name = infer_context_name(kerns, topgrad)

	kerns = gpu_contiguous(as_gpuarray_variable(kerns, ctx_name))
	topgrad = gpu_contiguous(as_gpuarray_variable(topgrad, ctx_name))
	img_shp = theano.tensor.as_tensor_variable(img_shp)

	precision = get_precision(precision, [kerns, topgrad])
	desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
	conv_mode=conv_mode, precision=precision)(kerns.shape)
	if beta == 0:
	real_out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
	else:
	assert out is not None
	out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
	check = Assert('GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape')
	real_out = check(out, theano.tensor.all(theano.tensor.eq(out.shape, img_shp)))
	return GpuDnnConvGradI(algo=algo)(kerns, topgrad, real_out, desc, alpha, beta)

	def _next_ten_exponent(val):
	# Return exponent for the next ten power that follows val.
	# val should be a positive integer.
	# Examples:
	# for 0 to 9, returns 1 (=> 10**1 == 10)
	# for 10 to 99, returns 2 (=> 10**2 == 100)
	ten_exponent = 1
	while val // 10 > 0:
	ten_exponent += 1
	val //= 10
	return ten_exponent

	def scale_numpy_arrays_inplace(A, B, alpha):
	scale_factor = 1
	# Scale down simultaneously A and B if alpha is not 1.
	if alpha != 1:
	scale_factor *= alpha
	# Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1)
	max_a = math.floor(abs(A.max()))
	max_b = math.floor(abs(B.max()))
	if max_a or max_b:
	m_a = _next_ten_exponent(max_a)
	m_b = _next_ten_exponent(max_b)
	max_m = max(m_a, m_b)
	scale_factor = 10 * max_m
	if scale_factor != 1:
	A /= scale_factor
	B /= scale_factor

	def array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator=np.random.random):
	out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation)
	out_shp = assert_conv_shape(out_shp)
	return allocator(out_shp).astype(dtype)

	def run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.random.random):
	inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters

	if beta == 0:
	inputs_val = None
	else:
	inputs_val = allocator(inputs_shape).astype(dtype)
	inputs_val /= 10
	filters_val = allocator(filters_shape).astype(dtype)
	topgrad_val = array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator)

	# Scale down the input values to prevent absolute errors in utt.assert_allclose.
	filters_val /= 10
	topgrad_val /= 10

	filters = theano.shared(filters_val)
	topgrad = theano.shared(topgrad_val)

	# Compile a theano function for the cuDNN implementation
	grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val,
	border_mode=border_mode, subsample=subsample, dilation=dilation,
	conv_mode=conv_mode, algo=algo, precision=precision)

	f = theano.function([], grad_i, mode=mode_with_gpu)

	# If conv_mode is 'conv' the reference implementation should use
	# filters flipped according to the width, height and time axis
	if conv_mode == 'conv':
	if filters.ndim == 5:
	flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
	else:
	flipped_filters = filters[:, :, ::-1, ::-1]
	else:
	flipped_filters = filters

	# Compile a theano function for the reference implementation
	grad_i_ref = CorrMM_gradInputs(border_mode=border_mode,
	subsample=subsample,
	filter_dilation=dilation
	)(ref_cast(flipped_filters),
	ref_cast(topgrad),
	inputs_shape[2:])
	f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")

	# Compare the results of the two implementations
	res_ref = f_ref()
	res = np.asarray(f())

	atol = 5e-2 if dtype == 'float16' else None
	rtol = atol
	if beta == 0:
	cpu_res = alpha * res_ref
	else:
	cpu_res = alpha * res_ref + beta * inputs_val
	print('Hash inputs_val :', None if inputs_val is None else hash_from_ndarray(inputs_val))
	print('Hash filters_val :', hash_from_ndarray(filters_val))
	print('Hash topgrad_val :', hash_from_ndarray(topgrad_val))
	print('Hash CPU res before scaling:', hash_from_ndarray(cpu_res))
	print('Hash res before scaling:', hash_from_ndarray(res))
	scale_numpy_arrays_inplace(cpu_res, res, alpha)
	print('Hash CPU res after scaling:', hash_from_ndarray(cpu_res))
	print('Hash res after scaling:', hash_from_ndarray(res))
	utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
	print('CPU')
	print(cpu_res.flatten()[:5], cpu_res.flatten()[-5:])
	print('res')
	print(res.flatten()[:5], res.flatten()[-5:])

	algo = 'deterministic'
	dtype = sys.argv[1] if len(sys.argv) > 1 else 'float16'
	precision = dtype
	parameters = (
	(2, 3, 300, 5),
	(2, 3, 40, 4),
	(1, 1),
	(1, 1),
	(1, 1),
	'conv',
	2,
	-3
	)
	print(algo, dtype, precision, parameters)
	utt.seed_rng(1234)
	run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
	run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
	# run_conv_gradinput(algo, dtype, precision, parameters)