Skip to content

Instantly share code, notes, and snippets.

@jeffdonahue
Last active March 24, 2017 05:24
Show Gist options
  • Save jeffdonahue/84eab3b74db5309da871a5abb78bdde6 to your computer and use it in GitHub Desktop.
Save jeffdonahue/84eab3b74db5309da871a5abb78bdde6 to your computer and use it in GitHub Desktop.
import numpy as np
import theano
import theano.tensor as T
low = 'float32'
high = 'float64'
dtype_low, dtype_high = [T.TensorType(f, (False,)) for f in [low, high]]
pred32, target32 = dtype_low('p'), dtype_low('t')
pred64, target64 = dtype_high('p'), dtype_high('t')
def get_funcs(pred, target):
L1 = T.nnet.binary_crossentropy(T.nnet.sigmoid(pred), target)
L2 = T.nnet.sigmoid_binary_crossentropy(pred, target)
g1, g2 = [theano.grad(L.sum(), [pred, target]) for L in [L1, L2]]
f1, f2 = [theano.function([pred, target], [L] + g)
for L, g in [(L1, g1), (L2, g2)]]
return f1, f2
f1_32, f2_32 = get_funcs(pred32, target32)
f1_64, f2_64 = get_funcs(pred64, target64)
np.random.seed(0)
def random_data(n=1000*1000): return np.random.randn(n).astype(low)
px32 = random_data()
lx32 = 1 / (1 + np.exp(random_data()))
px64, lx64 = [a.astype(high) for a in (px32, lx32)]
result_f1_32, result_f2_32 = f1_32(px32, lx32), f2_32(px32, lx32)
result_f1_64, result_f2_64 = f1_64(px64, lx64), f2_64(px64, lx64)
def mse(a, b): return ((a - b) ** 2).mean()
names = 'L', 'grad wrt pred', 'grad wrt target'
for gold_name, gold_result in [('BCE', result_f1_64), ('SBCE', result_f2_64)]:
print 'Using {} as gold standard'.format(gold_name)
for name, gold, bce, sbce in \
zip(names, gold_result, result_f1_32, result_f2_32):
err_bce = mse(gold, bce)
err_sbce = mse(gold, sbce)
improvement = (err_bce - err_sbce) / err_bce
print '\t{}: BCE error = {}; SBCE error = {}; improvement = {}%'.format(
name, err_bce, err_sbce, 100*improvement)
@jeffdonahue
Copy link
Author

jeffdonahue commented Mar 23, 2017

$ export THEANO_FLAGS=mode=FAST_RUN
$ python test_sbce_stable.py
Using BCE as gold standard
        L: BCE error = 1.15314951847e-15; SBCE error = 9.38517905342e-16; improvement = 18.6126438673%
        grad wrt pred: BCE error = 3.91702318772e-16; SBCE error = 2.21139921076e-16; improvement = 43.5438825664%
        grad wrt target: BCE error = 1.19102431604e-15; SBCE error = 4.99293794371e-33; improvement = 100.0%
Using SBCE as gold standard
        L: BCE error = 1.15314951847e-15; SBCE error = 9.38517905345e-16; improvement = 18.6126438672%
        grad wrt pred: BCE error = 3.91702318773e-16; SBCE error = 2.21139921076e-16; improvement = 43.5438825665%
        grad wrt target: BCE error = 1.19102431604e-15; SBCE error = 0.0; improvement = 100.0%
$ export THEANO_FLAGS=mode=FAST_COMPILE
$ python test_sbce_stable.py
Using BCE as gold standard
        L: BCE error = 2.30376330515e-15; SBCE error = 9.3851790534e-16; improvement = 59.261530764%
        grad wrt pred: BCE error = 5.60852399173e-16; SBCE error = 2.21139921078e-16; improvement = 60.5707452792%
        grad wrt target: BCE error = 8.05222354176e-15; SBCE error = 6.49619374989e-32; improvement = 100.0%
Using SBCE as gold standard
        L: BCE error = 2.30376330522e-15; SBCE error = 9.38517905345e-16; improvement = 59.2615307649%
        grad wrt pred: BCE error = 5.60852399169e-16; SBCE error = 2.21139921076e-16; improvement = 60.5707452792%
        grad wrt target: BCE error = 8.0522235419e-15; SBCE error = 0.0; improvement = 100.0%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment