Last active
July 22, 2020 22:11
-
-
Save gilbertfrancois/888f81042f5edaa42b1011d28264cff4 to your computer and use it in GitHub Desktop.
MXNet gluon.nn.BatchNorm issue report
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import gluoncv | |
import mxnet as mx | |
import mxnet.ndarray as nd | |
import numpy as np | |
import pandas as pd | |
from mxnet import init, autograd | |
from mxnet.gluon import nn | |
WITH_RESNET = True | |
N_EMBEDDINGS = 128 | |
N_CLASSES = 10 | |
np.random.seed(42) | |
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" | |
class MyNet(nn.HybridBlock): | |
""" Network with Feature extractor, custom tail (embeddings layer) and output layer. Set global variable | |
WITH_RESNET to use a ResNet50 as feature extractor, or set to False for a single conv block. | |
""" | |
def __init__(self, with_batchnorm, with_resnet=False): | |
super(MyNet, self).__init__() | |
with self.name_scope(): | |
# Choose a large feature extractor | |
if with_resnet: | |
backbone = gluoncv.model_zoo.get_model("resnet50_v1", pretrained=False) | |
self.features = backbone.features | |
# Or a custom small one... | |
else: | |
self.features = nn.HybridSequential() | |
self.features.add(nn.Conv2D(3, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), use_bias=False)) | |
self.features.add(nn.BatchNorm()) | |
self.features.add(nn.Activation("relu")) | |
self.features.add(nn.AvgPool2D(pool_size=(2, 2), strides=(2, 2))) | |
self.tail = nn.HybridSequential() | |
self.tail.add(nn.Flatten()) | |
if with_batchnorm: | |
self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5)) | |
self.tail.add(nn.Dense(N_EMBEDDINGS, weight_initializer=init.Normal(0.01))) | |
if with_batchnorm: | |
self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5)) | |
self.output = nn.Dense(N_CLASSES, weight_initializer=init.Normal(0.01)) | |
def hybrid_forward(self, F, x, *args, **kwargs): | |
x = self.features(x) | |
x = self.tail(x) | |
if autograd.is_recording(): | |
x = self.output(x) | |
return x | |
def get_net(with_batchnorm, input_shape): | |
""" Build the network and initialize the parameters. | |
PARAMETERS | |
---------- | |
with_batchnorm: bool | |
Adds BatchNorm layers to the tail of the network, if set to True. | |
RETURNS | |
------- | |
MyNet | |
""" | |
net = MyNet(with_batchnorm, WITH_RESNET) | |
net.initialize() | |
# Since parameters are lazy initialized, do a one time forward pass to complete initialization. | |
# Use autograd.record to reach the last layers. | |
x = nd.zeros(shape=input_shape) | |
with autograd.record(): | |
_ = net(x) | |
return net | |
def check_params(net, mx_ctx): | |
""" Check if all parameters have been initialized. If not, the call to param.data() | |
will crash the program. | |
PARAMETERS | |
---------- | |
net: MyNet | |
object of MyNet. | |
mx_ctx: list of mxnet.context.Context | |
mxnet compute context | |
""" | |
for ctx in mx_ctx: | |
for name, param in net.collect_params().items(): | |
_ = param.data(ctx) | |
if __name__ == '__main__': | |
ctx_test_list = [mx.cpu(), mx.gpu(0)] | |
input_shape_list = [(1, 3, 224, 224), (2, 3, 224, 224)] | |
res_list = [] | |
loss_fn = mx.gluon.loss.SoftmaxCrossEntropyLoss() | |
# Loop over 2 different networks, one with BN layers in net.tail and one without. | |
for input_shape in input_shape_list: | |
x = nd.random.randn(*input_shape) | |
y = nd.random.randint(0, 10, shape=(input_shape[0],)) | |
for with_batchnorm in [True, False]: | |
# Define and init the network on CPU context. | |
# Create a network | |
net = get_net(with_batchnorm, input_shape) | |
y_out_list = [] | |
y_embeddings_list = [] | |
# Test on CPU and GPU context. The results should be the same. | |
for ctx in ctx_test_list: | |
res = {} | |
res["ctx"] = str(ctx) | |
res["with_batchnorm"] = with_batchnorm | |
# Copy the input data and network parameters to context of choice. | |
x = x.as_in_context(ctx) | |
y = y.as_in_context(ctx) | |
net.collect_params().reset_ctx([ctx]) | |
check_params(net, [ctx]) | |
trainer = mx.gluon.Trainer(net.collect_params(), "sgd", | |
{"learning_rate": 0.01, "momentum": 0.9, "wd": 0.0005}) | |
# Simulate a forward training step, computing the class predictions. | |
with autograd.record(): | |
y_out = net(x) | |
loss = loss_fn(y_out, y) | |
loss.backward() | |
trainer.step(batch_size=input_shape[0]) | |
# Simulate a forward validation step, computing the embeddings. | |
y_embeddings = net(x) | |
# Do some validation comparison with embeddings here, e.g.: | |
# distance = np.sum(np.square(y_embeddings - y_embeddings_val), axis=1) | |
# acc = ... | |
# res["x"] = x.asnumpy() | |
res["input_shape"] = input_shape | |
res["y_out"] = y_out.asnumpy() | |
res["y_embeddings"] = y_embeddings.asnumpy() | |
res_list.append(res) | |
df = pd.DataFrame(res_list) | |
pd.set_option('display.max_rows', 50) | |
pd.set_option('display.max_columns', 50) | |
pd.set_option('display.width', 1000) | |
print(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment