Created
June 3, 2017 22:08
-
-
Save BarclayII/03f11523741509ff0638462b33a8f141 to your computer and use it in GitHub Desktop.
MXNet bug?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import mxnet as mx | |
from mxnet import nn | |
from mxnet.contrib import autograd | |
import numpy as np | |
import time | |
import os | |
ngpu = 1 | |
nz = 100 | |
ngf = 64 | |
ndf = 64 | |
nc = 3 | |
ctx = mx.gpu() | |
resize = 64 | |
data_shape = (3, 64, 64) | |
batch_size = 64 | |
if not os.path.isdir("data/"): | |
os.system("mkdir data/") | |
if (not os.path.exists('data/cifar/train.rec')) or \ | |
(not os.path.exists('data/cifar/test.rec')) or \ | |
(not os.path.exists('data/cifar/train.lst')) or \ | |
(not os.path.exists('data/cifar/test.lst')): | |
os.system("wget -q http://data.mxnet.io/mxnet/data/cifar10.zip -P data/") | |
os.chdir("./data") | |
os.system("unzip -u cifar10.zip") | |
os.chdir("..") | |
netG = nn.Sequential() | |
# input is Z, going into a convolution | |
netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0, in_filters=nz, use_bias=False)) | |
netG.add(nn.BatchNorm(num_features=ngf * 8)) | |
netG.add(nn.Activation('relu')) | |
# state size. (ngf*8) x 4 x 4 | |
netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1, in_filters=ngf * 8, use_bias=False)) | |
netG.add(nn.BatchNorm(num_features=ngf * 4)) | |
netG.add(nn.Activation('relu')) | |
# state size. (ngf*8) x 8 x 8 | |
netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1, in_filters=ngf * 4, use_bias=False)) | |
netG.add(nn.BatchNorm(num_features=ngf * 2)) | |
netG.add(nn.Activation('relu')) | |
# state size. (ngf*8) x 16 x 16 | |
netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1, in_filters=ngf * 2, use_bias=False)) | |
netG.add(nn.BatchNorm(num_features=ngf)) | |
netG.add(nn.Activation('relu')) | |
# state size. (ngf*8) x 32 x 32 | |
netG.add(nn.Conv2DTranspose(nc, 4, 2, 1, in_filters=ngf, use_bias=False)) | |
netG.add(nn.Activation('tanh')) | |
# state size. (nc) x 64 x 64 | |
# FIXME: | |
# The program crashes with the following message: | |
#[18:06:40] src/io/iter_image_recordio_2.cc:135: ImageRecordIOParser2: data/cifar/train.rec, use 3 threads for decoding.. | |
#[18:06:41] /scratch/qg323/mxnet/dmlc-core/include/dmlc/./logging.h:304: [18:06:41] /scratch/qg323/mxnet/mshadow/mshadow/./stream_gpu-inl.h:141: Check failed: e == cudaSuccess CUDA: all CUDA-capable devices are busy or unavailable | |
# | |
#Stack trace returned 6 entries: | |
#[bt] (0) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZN4dmlc15LogMessageFatalD1Ev+0x29) [0x2b66c3c40319] | |
#[bt] (1) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(+0x13598a5) [0x2b66c49608a5] | |
#[bt] (2) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZNSt17_Function_handlerIFvvEZZN5mxnet6engine23ThreadedEnginePerDevice13PushToExecuteEPNS2_8OprBlockEbENKUlvE1_clEvEUlvE_E9_M_invokeERKSt9_Any_data+0x87) [0x2b66c4963a27] | |
#[bt] (3) /lib64/libstdc++.so.6(+0xb5230) [0x2b668601f230] | |
#[bt] (4) /lib64/libpthread.so.0(+0x7dc5) [0x2b667adc4dc5] | |
#[bt] (5) /lib64/libc.so.6(clone+0x6d) [0x2b667b0d073d] | |
# | |
#terminate called after throwing an instance of 'dmlc::Error' | |
# what(): [18:06:41] /scratch/qg323/mxnet/mshadow/mshadow/./stream_gpu-inl.h:141: Check failed: e == cudaSuccess CUDA: all CUDA-capable devices are busy or unavailable | |
# | |
#Stack trace returned 6 entries: | |
#[bt] (0) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZN4dmlc15LogMessageFatalD1Ev+0x29) [0x2b66c3c40319] | |
#[bt] (1) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(+0x13598a5) [0x2b66c49608a5] | |
#[bt] (2) /scratch/qg323/mxnet/python/mxnet/../../lib/libmxnet.so(_ZNSt17_Function_handlerIFvvEZZN5mxnet6engine23ThreadedEnginePerDevice13PushToExecuteEPNS2_8OprBlockEbENKUlvE1_clEvEUlvE_E9_M_invokeERKSt9_Any_data+0x87) [0x2b66c4963a27] | |
#[bt] (3) /lib64/libstdc++.so.6(+0xb5230) [0x2b668601f230] | |
#[bt] (4) /lib64/libpthread.so.0(+0x7dc5) [0x2b667adc4dc5] | |
#[bt] (5) /lib64/libc.so.6(clone+0x6d) [0x2b667b0d073d] | |
# | |
#Aborted (core dumped) | |
# ... unless we move this block after the "initialize()" call. | |
### BEGIN | |
train_iter = mx.io.ImageRecordIter( | |
path_imgrec = "data/cifar/train.rec", | |
# mean_img = "data/cifar/mean.bin", | |
resize = resize, | |
data_shape = data_shape, | |
batch_size = batch_size, | |
rand_crop = True, | |
rand_mirror = True) | |
### END | |
netG.params.initialize(mx.init.Normal(0.05), ctx=ctx) | |
time.sleep(30) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment