Skip to content

Instantly share code, notes, and snippets.

@BarclayII
Created June 3, 2017 22:08
Show Gist options
  • Save BarclayII/03f11523741509ff0638462b33a8f141 to your computer and use it in GitHub Desktop.
Save BarclayII/03f11523741509ff0638462b33a8f141 to your computer and use it in GitHub Desktop.
MXNet bug?
import argparse
import mxnet as mx
from mxnet import nn
from mxnet.contrib import autograd
import numpy as np
import time
import os
ngpu = 1
nz = 100
ngf = 64
ndf = 64
nc = 3
ctx = mx.gpu()
resize = 64
data_shape = (3, 64, 64)
batch_size = 64
if not os.path.isdir("data/"):
os.system("mkdir data/")
if (not os.path.exists('data/cifar/train.rec')) or \
(not os.path.exists('data/cifar/test.rec')) or \
(not os.path.exists('data/cifar/train.lst')) or \
(not os.path.exists('data/cifar/test.lst')):
os.system("wget -q http://data.mxnet.io/mxnet/data/cifar10.zip -P data/")
os.chdir("./data")
os.system("unzip -u cifar10.zip")
os.chdir("..")
netG = nn.Sequential()
# input is Z, going into a convolution
netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0, in_filters=nz, use_bias=False))
netG.add(nn.BatchNorm(num_features=ngf * 8))
netG.add(nn.Activation('relu'))
# state size. (ngf*8) x 4 x 4
netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1, in_filters=ngf * 8, use_bias=False))
netG.add(nn.BatchNorm(num_features=ngf * 4))
netG.add(nn.Activation('relu'))
# state size. (ngf*8) x 8 x 8
netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1, in_filters=ngf * 4, use_bias=False))
netG.add(nn.BatchNorm(num_features=ngf * 2))
netG.add(nn.Activation('relu'))
# state size. (ngf*8) x 16 x 16
netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1, in_filters=ngf * 2, use_bias=False))
netG.add(nn.BatchNorm(num_features=ngf))
netG.add(nn.Activation('relu'))
# state size. (ngf*8) x 32 x 32
netG.add(nn.Conv2DTranspose(nc, 4, 2, 1, in_filters=ngf, use_bias=False))
netG.add(nn.Activation('tanh'))
# state size. (nc) x 64 x 64
# FIXME:
# The program crashes with the following message:
#[18:06:40] src/io/iter_image_recordio_2.cc:135: ImageRecordIOParser2: data/cifar/train.rec, use 3 threads for decoding..
#[18:06:41] /scratch/qg323/mxnet/dmlc-core/include/dmlc/./logging.h:304: [18:06:41] /scratch/qg323/mxnet/mshadow/mshadow/./stream_gpu-inl.h:141: Check failed: e == cudaSuccess CUDA: all CUDA-capable devices are busy or unavailable
#
#Stack trace returned 6 entries:
#[bt] (0) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZN4dmlc15LogMessageFatalD1Ev+0x29) [0x2b66c3c40319]
#[bt] (1) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(+0x13598a5) [0x2b66c49608a5]
#[bt] (2) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZNSt17_Function_handlerIFvvEZZN5mxnet6engine23ThreadedEnginePerDevice13PushToExecuteEPNS2_8OprBlockEbENKUlvE1_clEvEUlvE_E9_M_invokeERKSt9_Any_data+0x87) [0x2b66c4963a27]
#[bt] (3) /lib64/libstdc++.so.6(+0xb5230) [0x2b668601f230]
#[bt] (4) /lib64/libpthread.so.0(+0x7dc5) [0x2b667adc4dc5]
#[bt] (5) /lib64/libc.so.6(clone+0x6d) [0x2b667b0d073d]
#
#terminate called after throwing an instance of 'dmlc::Error'
# what(): [18:06:41] /scratch/qg323/mxnet/mshadow/mshadow/./stream_gpu-inl.h:141: Check failed: e == cudaSuccess CUDA: all CUDA-capable devices are busy or unavailable
#
#Stack trace returned 6 entries:
#[bt] (0) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZN4dmlc15LogMessageFatalD1Ev+0x29) [0x2b66c3c40319]
#[bt] (1) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(+0x13598a5) [0x2b66c49608a5]
#[bt] (2) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZNSt17_Function_handlerIFvvEZZN5mxnet6engine23ThreadedEnginePerDevice13PushToExecuteEPNS2_8OprBlockEbENKUlvE1_clEvEUlvE_E9_M_invokeERKSt9_Any_data+0x87) [0x2b66c4963a27]
#[bt] (3) /lib64/libstdc++.so.6(+0xb5230) [0x2b668601f230]
#[bt] (4) /lib64/libpthread.so.0(+0x7dc5) [0x2b667adc4dc5]
#[bt] (5) /lib64/libc.so.6(clone+0x6d) [0x2b667b0d073d]
#
#Aborted (core dumped)
# ... unless we move this block after the "initialize()" call.
### BEGIN
train_iter = mx.io.ImageRecordIter(
path_imgrec = "data/cifar/train.rec",
# mean_img = "data/cifar/mean.bin",
resize = resize,
data_shape = data_shape,
batch_size = batch_size,
rand_crop = True,
rand_mirror = True)
### END
netG.params.initialize(mx.init.Normal(0.05), ctx=ctx)
time.sleep(30)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment