Skip to content

Instantly share code, notes, and snippets.

@zhreshold
Last active May 4, 2018 17:50
Show Gist options
  • Save zhreshold/087c4a13a0c3e222f3c9773a1377b35f to your computer and use it in GitHub Desktop.
Save zhreshold/087c4a13a0c3e222f3c9773a1377b35f to your computer and use it in GitHub Desktop.
Debug DataLoaderIter
from gluoncv.data import imagenet
from mxnet.gluon.data.vision import transforms
from mxnet import gluon
import mxnet as mx
import time
import logging
logging.basicConfig(level=logging.DEBUG)
batch_size = 128
num_workers = 2
data_path = '~/data/ILSVRC2012/'
normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
jitter_param = 0.4
lighting_param = 0.1
transform_train = transforms.Compose([
#transforms.Resize(480),
transforms.RandomResizedCrop(224),
transforms.RandomFlipLeftRight(),
#transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
# saturation=jitter_param),
#transforms.RandomLighting(lighting_param),
transforms.ToTensor(),
normalize
])
transform_test = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize
])
train_data = gluon.data.DataLoader(
imagenet.classification.ImageNet(data_path, train=True).transform_first(transform_train),
batch_size=batch_size, shuffle=True, num_workers=num_workers)
#val_data = gluon.data.DataLoader(
# imagenet.classification.ImageNet(data_path, train=False).transform_first(transform_test),
# batch_size=batch_size, shuffle=False, num_workers=num_workers)
#import pdb; pdb.set_trace()
disp_batches = 50
#train = mx.contrib.io.DataLoaderIter(train_data, 'float32')
#val = mx.contrib.io.DataLoaderIter(val_data, 'float32')
train = train_data
if True:
tic = time.time()
for i, batch in enumerate(train):
for j in batch[0]:
j.wait_to_read()
if (i + 1) % disp_batches == 0:
logging.info('Batch [%d]\tSpeed: %.2f samples/sec', i,
disp_batches * batch_size / (time.time() - tic))
tic = time.time()
from mxnet import gluon
import mxnet as mx
import time
import logging
logging.basicConfig(level=logging.DEBUG)
import argparse
parser = argparse.ArgumentParser('p')
parser.add_argument('-j', dest='num_workers', type=int, required=True)
parser.add_argument('--init', type=str, default='zeros')
args = parser.parse_args()
batch_size = 128
num_workers = args.num_workers
class DummyDataset(gluon.data.Dataset):
def __getitem__(self, idx):
func = getattr(mx.nd, args.init)
return func(shape=(224, 224, 3))
def __len__(self):
return 100000
#import pdb; pdb.set_trace()
disp_batches = 50
train = gluon.data.DataLoader(
DummyDataset(), batch_size=batch_size, shuffle=True, num_workers=num_workers)
#mx.profiler.set_config(profile_all=True, filename='profile_output.json')
#mx.profiler.set_state('run')
import traceback
if True:
tic = time.time()
for i, batch in enumerate(train):
for j in batch[0]:
j.wait_to_read()
pass
if (i + 1) % disp_batches == 0:
logging.info('Batch [%d]\tSpeed: %.2f samples/sec', i,
disp_batches * batch_size / (time.time() - tic))
tic = time.time()
if i > 1000:
break
#mx.profiler.set_state('stop')
# Data loading code
import os
import time
import logging
logging.basicConfig(level=logging.INFO)
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
batch_size = 128
num_workers = 2
data_path = os.path.expanduser('~/data/ILSVRC2012/')
# Data loading code
traindir = os.path.join(data_path, 'train')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
]))
train_sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
num_workers=num_workers, pin_memory=False, sampler=train_sampler)
disp_batches = 50
#train = mx.contrib.io.DataLoaderIter(train_data, 'float32')
#val = mx.contrib.io.DataLoaderIter(val_data, 'float32')
train = train_loader
if True:
tic = time.time()
for i, batch in enumerate(train):
pass
if (i + 1) % disp_batches == 0:
logging.info('Batch [%d]\tSpeed: %.2f samples/sec', i,
disp_batches * batch_size / (time.time() - tic))
tic = time.time()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment