Last active
November 9, 2017 03:09
-
-
Save PENGZhaoqing/7a1b5d8919eae55a1de6063fbbb1fcec to your computer and use it in GitHub Desktop.
Mxnet manually gradients computation Vs. Graph loss symbol auto computation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mxnet as mx | |
import numpy as np | |
import mxnet.ndarray as nd | |
fc1_weight = [[0.58294852, 0.42608512, 0.96363545, 0.24708573], | |
[0.4417113, 0.10523346, 0.80098576, 0.46849809], | |
[0.08320606, 0.02893325, 0.55576215, 0.52159727], | |
[0.675476, 0.84439869, 0.1482909, 0.26626008], | |
[0.52650772, 0.07457639, 0.44785518, 0.71549555], | |
[0.52523266, 0.19377938, 0.02757852, 0.97459566], | |
[0.92856014, 0.68095418, 0.26694229, 0.68441936], | |
[0.77398185, 0.99398437, 0.35615027, 0.82315567], | |
[0.42741064, 0.05671271, 0.67730508, 0.00568796], | |
[0.289452, 0.90104051, 0.01030181, 0.69920418], | |
[0.29214084, 0.38468911, 0.988573, 0.48585752], | |
[0.27455112, 0.27176394, 0.54156989, 0.84917456], | |
[0.57979248, 0.73031199, 0.88747021, 0.59988834], | |
[0.59616888, 0.93295241, 0.01882551, 0.53378031], | |
[0.61438601, 0.4328007, 0.87544448, 0.25117127], | |
[0.99582249, 0.90169104, 0.95259686, 0.28756852], | |
[0.34949697, 0.58722187, 0.56623397, 0.36667131], | |
[0.42461918, 0.93779852, 0.85555812, 0.08616323], | |
[0.54120875, 0.74651195, 0.76785371, 0.14003219], | |
[0.04704561, 0.42374879, 0.34553214, 0.39982563]] | |
fc2_weight = [[0.93034432, 0.97610128, 0.73023039, 0.66139541, 0.20614977, 0.71942276, | |
0.45235542, 0.97821716, 0.43852386, 0.41171249, 0.99458696, 0.15474394, | |
0.9290056, 0.42087649, 0.12136203, 0.24829071, 0.60336717, 0.42120195, | |
0.96704948, 0.03118107]] | |
input = [[0.0883304, 0.57117354, 0.85744546, 0.10524275], | |
[0.73636623, 0.77042306, 0.49162071, 0.77236564], | |
[0.89761976, 0.67380134, 0.93971695, 0.01783999], | |
[0.229045, 0.93465984, 0.03231421, 0.83666546], | |
[0.64807769, 0.66681773, 0.29663677, 0.69582481], | |
[0.67980967, 0.64803839, 0.75006107, 0.13230413], | |
[0.10044154, 0.99758292, 0.41892161, 0.09607127], | |
[0.85938775, 0.40909924, 0.58771238, 0.0030594], | |
[0.15055842, 0.03993676, 0.48678655, 0.38202192], | |
[0.40731432, 0.57124535, 0.99926446, 0.11714415]] | |
target = [[0.40862913], | |
[0.46507598], | |
[0.73679818], | |
[0.754124], | |
[0.32920501], | |
[0.6278464], | |
[0.80039361], | |
[0.52620516], | |
[0.5376822], | |
[0.34798077]] | |
target = nd.array(target, ctx=mx.gpu(0)) | |
input = nd.array(input, ctx=mx.gpu(0)) | |
init_params = { | |
'fc1_weight': nd.array(fc1_weight, ctx=mx.gpu(0)), | |
'fc2_weight': nd.array(fc2_weight, ctx=mx.gpu(0)) | |
} | |
def network1(input, target): | |
data = mx.symbol.Variable('data') | |
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True) | |
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True) | |
model = mx.mod.Module(fc2, data_names=('data',), | |
label_names=None, context=mx.gpu(0)) | |
model.bind(data_shapes=[('data', (10, 4))], label_shapes=None, inputs_need_grad=True, grad_req="write") | |
model.init_params(arg_params=init_params, force_init=True) | |
model.init_optimizer() | |
model.forward(mx.io.DataBatch(data=[input], label=None), is_train=True) | |
Y = model.get_outputs()[0] | |
model.backward(out_grads=[2 * (Y - target)]) | |
grad_dict = model._exec_group.execs[0].grad_dict | |
return grad_dict['fc1_weight'].asnumpy() | |
def network2(input, target): | |
data = mx.symbol.Variable('data') | |
target_sym = mx.symbol.Variable('target') | |
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True) | |
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True) | |
loss = mx.symbol.sum(mx.symbol.square(fc2 - target_sym)) | |
loss = mx.symbol.MakeLoss(loss) | |
out = mx.symbol.BlockGrad(fc2) | |
loss = mx.symbol.Group([loss, out]) | |
model = mx.mod.Module(loss, data_names=('data', 'target'), | |
label_names=None, context=mx.gpu(0)) | |
model.bind(data_shapes=[('data', (10, 4)), ('target', (10, 1))], label_shapes=None, inputs_need_grad=True, | |
grad_req="write") | |
model.init_params(arg_params=init_params, force_init=True) | |
model.init_optimizer() | |
model.forward(mx.io.DataBatch(data=[input, target], label=None), is_train=True) | |
model.backward() | |
grad_dict = model._exec_group.execs[0].grad_dict | |
return grad_dict['fc1_weight'].asnumpy() | |
def network3(input, target): | |
data = mx.symbol.Variable('data') | |
target_sym = mx.symbol.Variable('target') | |
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True) | |
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True) | |
loss = mx.symbol.square(fc2 - target_sym) | |
loss = mx.symbol.MakeLoss(loss) | |
model = mx.mod.Module(loss, data_names=('data', 'target'), | |
label_names=None, context=mx.gpu(0)) | |
model.bind(data_shapes=[('data', (10, 4)), ('target', (10, 1))], label_shapes=None, inputs_need_grad=True, | |
grad_req="write") | |
model.init_params(arg_params=init_params, force_init=True) | |
model.init_optimizer() | |
model.forward(mx.io.DataBatch(data=[input, target], label=None), is_train=True) | |
model.backward() | |
grad_dict = model._exec_group.execs[0].grad_dict | |
return grad_dict['fc1_weight'].asnumpy() | |
def network4(input): | |
data = mx.symbol.Variable('data') | |
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True) | |
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True) | |
loss = mx.symbol.MakeLoss(mx.symbol.sum(-fc2)) | |
model = mx.mod.Module(loss, data_names=('data',), | |
label_names=None, context=mx.gpu(0)) | |
model.bind(data_shapes=[('data', (10, 4))], label_shapes=None, inputs_need_grad=True, | |
grad_req="write") | |
model.init_params(arg_params=init_params, force_init=True) | |
model.init_optimizer() | |
model.forward(mx.io.DataBatch(data=[input], label=None), is_train=True) | |
model.backward() | |
grad_dict = model._exec_group.execs[0].grad_dict | |
return grad_dict['fc1_weight'].asnumpy() | |
def network5(input): | |
data = mx.symbol.Variable('data') | |
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True) | |
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True) | |
model = mx.mod.Module(fc2, data_names=('data',), | |
label_names=None, context=mx.gpu(0)) | |
model.bind(data_shapes=[('data', (10, 4))], label_shapes=None, inputs_need_grad=True, | |
grad_req="write") | |
model.init_params(arg_params=init_params, force_init=True) | |
model.init_optimizer() | |
model.forward(mx.io.DataBatch(data=[input], label=None), is_train=True) | |
gradient = -1 * nd.ones((10, 1), ctx=mx.gpu(0)) | |
model.backward(out_grads=[gradient]) | |
grad_dict = model._exec_group.execs[0].grad_dict | |
return grad_dict['fc1_weight'].asnumpy() | |
print network1(input, target) - network2(input, target) | |
print network2(input, target) - network3(input, target) | |
print network4(input) - network5(input) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment