Skip to content

Instantly share code, notes, and snippets.

@chiroptical
Last active November 13, 2019 21:29
Show Gist options
  • Select an option

  • Save chiroptical/042e983ee59ec6b463e53e56941370df to your computer and use it in GitHub Desktop.

Select an option

Save chiroptical/042e983ee59ec6b463e53e56941370df to your computer and use it in GitHub Desktop.
Trying to train a multi-gpu inception model. I need some help.
#!/usr/bin/env python3
import tensorflow as tf
import pandas as pd
import numpy as np
import sigmoid_inception_v3
from glob import glob
from timeit import default_timer as timer
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import as_completed
from math import floor
from multi_gpu import make_parallel
# Constants
NUM_GPUS = 4
WORKERS = 4
BATCH_SIZE = 32 * NUM_GPUS
#NUMBER_OF_IMAGES = 1187185
NUMBER_OF_IMAGES = 254383
STEPS_PER_EPOCH = floor(0.10 * NUMBER_OF_IMAGES / BATCH_SIZE)
print("--------------- DEBUG -------------------")
print("-----------------------------------------")
print(f"NUM_GPUS: {NUM_GPUS}")
print(f"WORKERS: {WORKERS}")
print(f"BATCH_SIZE: {BATCH_SIZE}")
print(f"NUMBER_OF_IMAGES: {NUMBER_OF_IMAGES}")
print(f"STEPS_PER_EPOCH: {STEPS_PER_EPOCH}")
print("-----------------------------------------")
# First, build the dtypes dictionary
birds = pd.read_csv("birds.csv")
birds = birds["species"].values
dtypes = {"filename": np.str}
for bird in birds:
dtypes[bird] = np.int
# Second, load the data
def read_df(f):
return pd.read_csv(f, dtype=dtypes)
files = glob("./train-test-split/segment-*.csv")
start = timer()
executor = ProcessPoolExecutor(10)
futs = [executor.submit(read_df, f) for f in files]
results = [None] * 10
for idx, fut in enumerate(as_completed(futs)):
results[idx] = fut.result()
end = timer()
print(f"Load time: {end - start} seconds")
start = timer()
df = pd.concat(results)
end = timer()
print(f"Concat time: {end - start} seconds")
# Try to lower the memory usage by setting
# -> Around 57GB prior to this change
# -> Down to ... after this change
results = [None for _ in results]
print("Setup ImageDataGenerator")
image_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
validation_split=0.3,
)
train_gen = image_data_gen.flow_from_dataframe(
df,
directory=None,
x_col="filename",
y_col=birds,
target_size=(299, 299),
class_mode="raw",
batch_size=BATCH_SIZE,
shuffle=False,
subset="training",
seed=42,
)
valid_gen = image_data_gen.flow_from_dataframe(
df,
directory=None,
x_col="filename",
y_col=birds,
target_size=(299, 299),
class_mode="raw",
batch_size=BATCH_SIZE,
shuffle=False,
subset="validation",
seed=42,
)
print("Load the Model")
# Get InceptionV3 Model
model = sigmoid_inception_v3.InceptionV3(
include_top=True,
weights=None,
input_shape=(299, 299, 3),
classes=707,
)
# Use multi-gpu model
model = make_parallel(model, NUM_GPUS)
print("Compile the Model")
model.compile(
loss="categorical_crossentropy",
metrics=["accuracy", "categorical_accuracy"],
optimizer="Adam",
)
print("Fit the Model")
model.fit_generator(
generator=train_gen,
validation_data=valid_gen,
epochs=25,
steps_per_epoch=STEPS_PER_EPOCH,
use_multiprocessing=True,
workers=WORKERS,
verbose=2,
)
model.save("categorical_inception_v3.h5")
# Borrowed from https://github.com/kuza55/keras-extras/blob/master/utils/multi_gpu.py
import tensorflow as tf
def make_parallel(model, gpu_count):
def get_slice(data, idx, parts):
shape = tf.shape(data)
size = tf.concat([shape[:1] // parts, shape[1:]], axis=0)
stride = tf.concat([shape[:1] // parts, shape[1:] * 0], axis=0)
start = stride * idx
return tf.slice(data, start, size)
outputs_all = []
for i in range(len(model.outputs)):
outputs_all.append([])
# Place a copy of the model on each GPU, each getting a slice of the batch
for i in range(gpu_count):
with tf.device("/gpu:%d" % i):
with tf.name_scope("tower_%d" % i) as scope:
inputs = []
# Slice each input into a piece for processing on this GPU
for x in model.inputs:
input_shape = tuple(x.get_shape().as_list())[1:]
slice_n = tf.keras.layers.Lambda(
get_slice,
output_shape=input_shape,
arguments={"idx": i, "parts": gpu_count},
)(x)
inputs.append(slice_n)
outputs = model(inputs)
if not isinstance(outputs, list):
outputs = [outputs]
# Save all the outputs for merging back together later
for l in range(len(outputs)):
outputs_all[l].append(outputs[l])
# merge outputs on CPU
with tf.device("/cpu:0"):
merged = []
for outputs in outputs_all:
merged.append(tf.keras.layers.merge(outputs, mode="concat", concat_axis=0))
return tf.keras.models.Model(input=model.inputs, output=merged)
"""Inception V3 model for Keras.
Note that the input image format for this model is different than for
the VGG16 and ResNet models (299x299 instead of 224x224),
and that the input preprocessing function is also different (same as Xception).
# Reference
- [Rethinking the Inception Architecture for Computer Vision](
http://arxiv.org/abs/1512.00567) (CVPR 2016)
"""
# Modified from https://github.com/keras-team/keras-applications/blob/master/keras_applications/inception_v3.py
# -> Basically instead of softmax logits layer I am using sigmoid
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from keras_applications import get_submodules_from_kwargs
from keras_applications import imagenet_utils
from keras_applications.imagenet_utils import decode_predictions
from keras_applications.imagenet_utils import _obtain_input_shape
import tensorflow.keras.layers as layers
import tensorflow.keras.models as models
WEIGHTS_PATH = (
'https://github.com/fchollet/deep-learning-models/'
'releases/download/v0.5/'
'inception_v3_weights_tf_dim_ordering_tf_kernels.h5')
WEIGHTS_PATH_NO_TOP = (
'https://github.com/fchollet/deep-learning-models/'
'releases/download/v0.5/'
'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5')
def conv2d_bn(x,
filters,
num_row,
num_col,
padding='same',
strides=(1, 1),
name=None):
"""Utility function to apply conv + BN.
# Arguments
x: input tensor.
filters: filters in `Conv2D`.
num_row: height of the convolution kernel.
num_col: width of the convolution kernel.
padding: padding mode in `Conv2D`.
strides: strides in `Conv2D`.
name: name of the ops; will become `name + '_conv'`
for the convolution and `name + '_bn'` for the
batch norm layer.
# Returns
Output tensor after applying `Conv2D` and `BatchNormalization`.
"""
if name is not None:
bn_name = name + '_bn'
conv_name = name + '_conv'
else:
bn_name = None
conv_name = None
#if backend.image_data_format() == 'channels_first':
# bn_axis = 1
#else:
# bn_axis = 3
bn_axis = 3
x = layers.Conv2D(
filters, (num_row, num_col),
strides=strides,
padding=padding,
use_bias=False,
name=conv_name)(x)
x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
x = layers.Activation('relu', name=name)(x)
return x
def InceptionV3(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
**kwargs):
"""Instantiates the Inception v3 architecture.
Optionally loads weights pre-trained on ImageNet.
Note that the data format convention used by the model is
the one specified in your Keras config at `~/.keras/keras.json`.
# Arguments
include_top: whether to include the fully-connected
layer at the top of the network.
weights: one of `None` (random initialization),
'imagenet' (pre-training on ImageNet),
or the path to the weights file to be loaded.
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
input_shape: optional shape tuple, only to be specified
if `include_top` is False (otherwise the input shape
has to be `(299, 299, 3)` (with `channels_last` data format)
or `(3, 299, 299)` (with `channels_first` data format).
It should have exactly 3 inputs channels,
and width and height should be no smaller than 75.
E.g. `(150, 150, 3)` would be one valid value.
pooling: Optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be
the 4D tensor output of the
last convolutional block.
- `avg` means that global average pooling
will be applied to the output of the
last convolutional block, and thus
the output of the model will be a 2D tensor.
- `max` means that global max pooling will
be applied.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True, and
if no `weights` argument is specified.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
"""
# Determine proper input shape
input_shape = _obtain_input_shape(
input_shape,
default_size=299,
min_size=75,
data_format="channels_last",
require_flatten=include_top,
weights=weights
)
img_input = layers.Input(shape=input_shape)
channel_axis = 3
x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid')
x = conv2d_bn(x, 32, 3, 3, padding='valid')
x = conv2d_bn(x, 64, 3, 3)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv2d_bn(x, 80, 1, 1, padding='valid')
x = conv2d_bn(x, 192, 3, 3, padding='valid')
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
# mixed 0: 35 x 35 x 256
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch5x5 = conv2d_bn(x, 48, 1, 1)
branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
x = layers.concatenate(
[branch1x1, branch5x5, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed0')
# mixed 1: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch5x5 = conv2d_bn(x, 48, 1, 1)
branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = layers.concatenate(
[branch1x1, branch5x5, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed1')
# mixed 2: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch5x5 = conv2d_bn(x, 48, 1, 1)
branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = layers.concatenate(
[branch1x1, branch5x5, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed2')
# mixed 3: 17 x 17 x 768
branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(
branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')
branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = layers.concatenate(
[branch3x3, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed3')
# mixed 4: 17 x 17 x 768
branch1x1 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(x, 128, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
branch7x7dbl = conv2d_bn(x, 128, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch7x7, branch7x7dbl, branch_pool],
axis=channel_axis,
name='mixed4')
# mixed 5, 6: 17 x 17 x 768
for i in range(2):
branch1x1 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(x, 160, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 160, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
branch7x7dbl = conv2d_bn(x, 160, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch_pool = layers.AveragePooling2D(
(3, 3), strides=(1, 1), padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch7x7, branch7x7dbl, branch_pool],
axis=channel_axis,
name='mixed' + str(5 + i))
# mixed 7: 17 x 17 x 768
branch1x1 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 192, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
branch7x7dbl = conv2d_bn(x, 192, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch7x7, branch7x7dbl, branch_pool],
axis=channel_axis,
name='mixed7')
# mixed 8: 8 x 8 x 1280
branch3x3 = conv2d_bn(x, 192, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 320, 3, 3,
strides=(2, 2), padding='valid')
branch7x7x3 = conv2d_bn(x, 192, 1, 1)
branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
branch7x7x3 = conv2d_bn(
branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid')
branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = layers.concatenate(
[branch3x3, branch7x7x3, branch_pool],
axis=channel_axis,
name='mixed8')
# mixed 9: 8 x 8 x 2048
for i in range(2):
branch1x1 = conv2d_bn(x, 320, 1, 1)
branch3x3 = conv2d_bn(x, 384, 1, 1)
branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
branch3x3 = layers.concatenate(
[branch3x3_1, branch3x3_2],
axis=channel_axis,
name='mixed9_' + str(i))
branch3x3dbl = conv2d_bn(x, 448, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
branch3x3dbl = layers.concatenate(
[branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis)
branch_pool = layers.AveragePooling2D(
(3, 3), strides=(1, 1), padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch3x3, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed' + str(9 + i))
if include_top:
# Classification block
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='sigmoid', name='predictions')(x)
else:
if pooling == 'avg':
x = layers.GlobalAveragePooling2D()(x)
elif pooling == 'max':
x = layers.GlobalMaxPooling2D()(x)
# Ensure that the model takes into account
# any potential predecessors of `input_tensor`.
if input_tensor is not None:
inputs = keras_utils.get_source_inputs(input_tensor)
else:
inputs = img_input
# Create model.
model = models.Model(inputs, x, name='inception_v3')
# Load weights.
if weights == 'imagenet':
if include_top:
weights_path = keras_utils.get_file(
'inception_v3_weights_tf_dim_ordering_tf_kernels.h5',
WEIGHTS_PATH,
cache_subdir='models',
file_hash='9a0d58056eeedaa3f26cb7ebd46da564')
else:
weights_path = keras_utils.get_file(
'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5',
WEIGHTS_PATH_NO_TOP,
cache_subdir='models',
file_hash='bcbd6486424b2319ff4ef7d526e38f63')
model.load_weights(weights_path)
elif weights is not None:
model.load_weights(weights)
return model
def preprocess_input(x, **kwargs):
"""Preprocesses a numpy array encoding a batch of images.
# Arguments
x: a 4D numpy array consists of RGB values within [0, 255].
# Returns
Preprocessed array.
"""
return imagenet_utils.preprocess_input(x, mode='tf', **kwargs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment