chiroptical · November 13, 2019 21:29
diff --git a/fit_inception.py b/fit_inception.py
 #!/usr/bin/env python3
 import tensorflow as tf
 import pandas as pd
 import numpy as np
 import sigmoid_inception_v3
 from glob import glob
 from timeit import default_timer as timer
 from concurrent.futures import ProcessPoolExecutor
 from concurrent.futures import as_completed
 from math import floor
 from multi_gpu import make_parallel

 # Constants

 NUM_GPUS = 4
 WORKERS = 4
 BATCH_SIZE = 32 * NUM_GPUS
 #NUMBER_OF_IMAGES = 1187185
 NUMBER_OF_IMAGES = 254383
 STEPS_PER_EPOCH = floor(0.10 * NUMBER_OF_IMAGES / BATCH_SIZE)

 print("--------------- DEBUG -------------------")
 print("-----------------------------------------")
 print(f"NUM_GPUS: {NUM_GPUS}")
 print(f"WORKERS: {WORKERS}")
 print(f"BATCH_SIZE: {BATCH_SIZE}")
 print(f"NUMBER_OF_IMAGES: {NUMBER_OF_IMAGES}")
 print(f"STEPS_PER_EPOCH: {STEPS_PER_EPOCH}")
 print("-----------------------------------------")

 # First, build the dtypes dictionary
 birds = pd.read_csv("birds.csv")
 birds = birds["species"].values

 dtypes = {"filename": np.str}
 for bird in birds:
    dtypes[bird] = np.int

 # Second, load the data 
 def read_df(f):
    return pd.read_csv(f, dtype=dtypes)

 files = glob("./train-test-split/segment-*.csv")

 start = timer()
 executor = ProcessPoolExecutor(10)
 futs = [executor.submit(read_df, f) for f in files]
 results = [None] * 10
 for idx, fut in enumerate(as_completed(futs)):
    results[idx] = fut.result()
 end = timer()
 print(f"Load time: {end - start} seconds")

 start = timer()
 df = pd.concat(results)
 end = timer()
 print(f"Concat time: {end - start} seconds")

 # Try to lower the memory usage by setting
 # -> Around 57GB prior to this change
 # -> Down to ... after this change
 results = [None for _ in results]

 print("Setup ImageDataGenerator")

 image_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.3,
 )

 train_gen = image_data_gen.flow_from_dataframe(
    df,
    directory=None,
    x_col="filename",
    y_col=birds,
    target_size=(299, 299),
    class_mode="raw",
    batch_size=BATCH_SIZE,
    shuffle=False,
    subset="training",
    seed=42,
 )

 valid_gen = image_data_gen.flow_from_dataframe(
    df,
    directory=None,
    x_col="filename",
    y_col=birds,
    target_size=(299, 299),
    class_mode="raw",
    batch_size=BATCH_SIZE,
    shuffle=False,
    subset="validation",
    seed=42,
 )

 print("Load the Model")

 # Get InceptionV3 Model
 model = sigmoid_inception_v3.InceptionV3(
    include_top=True,
    weights=None,
    input_shape=(299, 299, 3),
    classes=707,
 )

 # Use multi-gpu model
 model = make_parallel(model, NUM_GPUS)

 print("Compile the Model")

 model.compile(
    loss="categorical_crossentropy",
    metrics=["accuracy", "categorical_accuracy"],
    optimizer="Adam",
 )

 print("Fit the Model")

 model.fit_generator(
    generator=train_gen,
    validation_data=valid_gen,
    epochs=25,
    steps_per_epoch=STEPS_PER_EPOCH,
    use_multiprocessing=True,
    workers=WORKERS,
    verbose=2,
 )

 model.save("categorical_inception_v3.h5")
diff --git a/multi_gpu.py b/multi_gpu.py
 # Borrowed from https://github.com/kuza55/keras-extras/blob/master/utils/multi_gpu.py

 import tensorflow as tf


 def make_parallel(model, gpu_count):
    def get_slice(data, idx, parts):
        shape = tf.shape(data)
        size = tf.concat([shape[:1] // parts, shape[1:]], axis=0)
        stride = tf.concat([shape[:1] // parts, shape[1:] * 0], axis=0)
        start = stride * idx
        return tf.slice(data, start, size)

    outputs_all = []
    for i in range(len(model.outputs)):
        outputs_all.append([])

    # Place a copy of the model on each GPU, each getting a slice of the batch
    for i in range(gpu_count):
        with tf.device("/gpu:%d" % i):
            with tf.name_scope("tower_%d" % i) as scope:

                inputs = []
                # Slice each input into a piece for processing on this GPU
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_n = tf.keras.layers.Lambda(
                        get_slice,
                        output_shape=input_shape,
                        arguments={"idx": i, "parts": gpu_count},
                    )(x)
                    inputs.append(slice_n)

                outputs = model(inputs)

                if not isinstance(outputs, list):
                    outputs = [outputs]

                # Save all the outputs for merging back together later
                for l in range(len(outputs)):
                    outputs_all[l].append(outputs[l])

    # merge outputs on CPU
    with tf.device("/cpu:0"):
        merged = []
        for outputs in outputs_all:
            merged.append(tf.keras.layers.merge(outputs, mode="concat", concat_axis=0))

        return tf.keras.models.Model(input=model.inputs, output=merged)
diff --git a/sigmoid_inception_v3.py b/sigmoid_inception_v3.py
 """Inception V3 model for Keras.

 Note that the input image format for this model is different than for
 the VGG16 and ResNet models (299x299 instead of 224x224),
 and that the input preprocessing function is also different (same as Xception).

 # Reference

 - [Rethinking the Inception Architecture for Computer Vision](
    http://arxiv.org/abs/1512.00567) (CVPR 2016)

 """

 # Modified from https://github.com/keras-team/keras-applications/blob/master/keras_applications/inception_v3.py
 # -> Basically instead of softmax logits layer I am using sigmoid

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import os

 from keras_applications import get_submodules_from_kwargs
 from keras_applications import imagenet_utils
 from keras_applications.imagenet_utils import decode_predictions
 from keras_applications.imagenet_utils import _obtain_input_shape

 import tensorflow.keras.layers as layers
 import tensorflow.keras.models as models


 WEIGHTS_PATH = (
    'https://github.com/fchollet/deep-learning-models/'
    'releases/download/v0.5/'
    'inception_v3_weights_tf_dim_ordering_tf_kernels.h5')
 WEIGHTS_PATH_NO_TOP = (
    'https://github.com/fchollet/deep-learning-models/'
    'releases/download/v0.5/'
    'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5')

 def conv2d_bn(x,
              filters,
              num_row,
              num_col,
              padding='same',
              strides=(1, 1),
              name=None):
    """Utility function to apply conv + BN.

    # Arguments
        x: input tensor.
        filters: filters in `Conv2D`.
        num_row: height of the convolution kernel.
        num_col: width of the convolution kernel.
        padding: padding mode in `Conv2D`.
        strides: strides in `Conv2D`.
        name: name of the ops; will become `name + '_conv'`
            for the convolution and `name + '_bn'` for the
            batch norm layer.

    # Returns
        Output tensor after applying `Conv2D` and `BatchNormalization`.
    """
    if name is not None:
        bn_name = name + '_bn'
        conv_name = name + '_conv'
    else:
        bn_name = None
        conv_name = None
    #if backend.image_data_format() == 'channels_first':
    #    bn_axis = 1
    #else:
    #    bn_axis = 3
    bn_axis = 3
    x = layers.Conv2D(
        filters, (num_row, num_col),
        strides=strides,
        padding=padding,
        use_bias=False,
        name=conv_name)(x)
    x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
    x = layers.Activation('relu', name=name)(x)
    return x


 def InceptionV3(include_top=True,
                weights='imagenet',
                input_tensor=None,
                input_shape=None,
                pooling=None,
                classes=1000,
                **kwargs):
    """Instantiates the Inception v3 architecture.

    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.

    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(299, 299, 3)` (with `channels_last` data format)
            or `(3, 299, 299)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 75.
            E.g. `(150, 150, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """

    # Determine proper input shape
    input_shape = _obtain_input_shape(
        input_shape,
        default_size=299,
        min_size=75,
        data_format="channels_last",
        require_flatten=include_top,
        weights=weights
    )

    img_input = layers.Input(shape=input_shape)

    channel_axis = 3

    x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid')
    x = conv2d_bn(x, 32, 3, 3, padding='valid')
    x = conv2d_bn(x, 64, 3, 3)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv2d_bn(x, 80, 1, 1, padding='valid')
    x = conv2d_bn(x, 192, 3, 3, padding='valid')
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    # mixed 0: 35 x 35 x 256
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed0')

    # mixed 1: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed1')

    # mixed 2: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed2')

    # mixed 3: 17 x 17 x 768
    branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(
        branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')

    branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = layers.concatenate(
        [branch3x3, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed3')

    # mixed 4: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, 192, 1, 1)

    branch7x7 = conv2d_bn(x, 128, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

    branch7x7dbl = conv2d_bn(x, 128, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
        axis=channel_axis,
        name='mixed4')

    # mixed 5, 6: 17 x 17 x 768
    for i in range(2):
        branch1x1 = conv2d_bn(x, 192, 1, 1)

        branch7x7 = conv2d_bn(x, 160, 1, 1)
        branch7x7 = conv2d_bn(branch7x7, 160, 1, 7)
        branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

        branch7x7dbl = conv2d_bn(x, 160, 1, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

        branch_pool = layers.AveragePooling2D(
            (3, 3), strides=(1, 1), padding='same')(x)
        branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
        x = layers.concatenate(
            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
            axis=channel_axis,
            name='mixed' + str(5 + i))

    # mixed 7: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, 192, 1, 1)

    branch7x7 = conv2d_bn(x, 192, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, 192, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

    branch7x7dbl = conv2d_bn(x, 192, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
        axis=channel_axis,
        name='mixed7')

    # mixed 8: 8 x 8 x 1280
    branch3x3 = conv2d_bn(x, 192, 1, 1)
    branch3x3 = conv2d_bn(branch3x3, 320, 3, 3,
                          strides=(2, 2), padding='valid')

    branch7x7x3 = conv2d_bn(x, 192, 1, 1)
    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
    branch7x7x3 = conv2d_bn(
        branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid')

    branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = layers.concatenate(
        [branch3x3, branch7x7x3, branch_pool],
        axis=channel_axis,
        name='mixed8')

    # mixed 9: 8 x 8 x 2048
    for i in range(2):
        branch1x1 = conv2d_bn(x, 320, 1, 1)

        branch3x3 = conv2d_bn(x, 384, 1, 1)
        branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
        branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
        branch3x3 = layers.concatenate(
            [branch3x3_1, branch3x3_2],
            axis=channel_axis,
            name='mixed9_' + str(i))

        branch3x3dbl = conv2d_bn(x, 448, 1, 1)
        branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
        branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
        branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
        branch3x3dbl = layers.concatenate(
            [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis)

        branch_pool = layers.AveragePooling2D(
            (3, 3), strides=(1, 1), padding='same')(x)
        branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
        x = layers.concatenate(
            [branch1x1, branch3x3, branch3x3dbl, branch_pool],
            axis=channel_axis,
            name='mixed' + str(9 + i))
    if include_top:
        # Classification block
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        x = layers.Dense(classes, activation='sigmoid', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = models.Model(inputs, x, name='inception_v3')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            weights_path = keras_utils.get_file(
                'inception_v3_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='9a0d58056eeedaa3f26cb7ebd46da564')
        else:
            weights_path = keras_utils.get_file(
                'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                file_hash='bcbd6486424b2319ff4ef7d526e38f63')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model


 def preprocess_input(x, **kwargs):
    """Preprocesses a numpy array encoding a batch of images.

    # Arguments
        x: a 4D numpy array consists of RGB values within [0, 255].

    # Returns
        Preprocessed array.
    """
    return imagenet_utils.preprocess_input(x, mode='tf', **kwargs)
	#!/usr/bin/env python3
	import tensorflow as tf
	import pandas as pd
	import numpy as np
	import sigmoid_inception_v3
	from glob import glob
	from timeit import default_timer as timer
	from concurrent.futures import ProcessPoolExecutor
	from concurrent.futures import as_completed
	from math import floor
	from multi_gpu import make_parallel

	# Constants

	NUM_GPUS = 4
	WORKERS = 4
	BATCH_SIZE = 32 * NUM_GPUS
	#NUMBER_OF_IMAGES = 1187185
	NUMBER_OF_IMAGES = 254383
	STEPS_PER_EPOCH = floor(0.10 * NUMBER_OF_IMAGES / BATCH_SIZE)

	print("--------------- DEBUG -------------------")
	print("-----------------------------------------")
	print(f"NUM_GPUS: {NUM_GPUS}")
	print(f"WORKERS: {WORKERS}")
	print(f"BATCH_SIZE: {BATCH_SIZE}")
	print(f"NUMBER_OF_IMAGES: {NUMBER_OF_IMAGES}")
	print(f"STEPS_PER_EPOCH: {STEPS_PER_EPOCH}")
	print("-----------------------------------------")

	# First, build the dtypes dictionary
	birds = pd.read_csv("birds.csv")
	birds = birds["species"].values

	dtypes = {"filename": np.str}
	for bird in birds:
	dtypes[bird] = np.int

	# Second, load the data
	def read_df(f):
	return pd.read_csv(f, dtype=dtypes)

	files = glob("./train-test-split/segment-*.csv")

	start = timer()
	executor = ProcessPoolExecutor(10)
	futs = [executor.submit(read_df, f) for f in files]
	results = [None] * 10
	for idx, fut in enumerate(as_completed(futs)):
	results[idx] = fut.result()
	end = timer()
	print(f"Load time: {end - start} seconds")

	start = timer()
	df = pd.concat(results)
	end = timer()
	print(f"Concat time: {end - start} seconds")

	# Try to lower the memory usage by setting
	# -> Around 57GB prior to this change
	# -> Down to ... after this change
	results = [None for _ in results]

	print("Setup ImageDataGenerator")

	image_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
	rescale=1./255,
	validation_split=0.3,
	)

	train_gen = image_data_gen.flow_from_dataframe(
	df,
	directory=None,
	x_col="filename",
	y_col=birds,
	target_size=(299, 299),
	class_mode="raw",
	batch_size=BATCH_SIZE,
	shuffle=False,
	subset="training",
	seed=42,
	)

	valid_gen = image_data_gen.flow_from_dataframe(
	df,
	directory=None,
	x_col="filename",
	y_col=birds,
	target_size=(299, 299),
	class_mode="raw",
	batch_size=BATCH_SIZE,
	shuffle=False,
	subset="validation",
	seed=42,
	)

	print("Load the Model")

	# Get InceptionV3 Model
	model = sigmoid_inception_v3.InceptionV3(
	include_top=True,
	weights=None,
	input_shape=(299, 299, 3),
	classes=707,
	)

	# Use multi-gpu model
	model = make_parallel(model, NUM_GPUS)

	print("Compile the Model")

	model.compile(
	loss="categorical_crossentropy",
	metrics=["accuracy", "categorical_accuracy"],
	optimizer="Adam",
	)

	print("Fit the Model")

	model.fit_generator(
	generator=train_gen,
	validation_data=valid_gen,
	epochs=25,
	steps_per_epoch=STEPS_PER_EPOCH,
	use_multiprocessing=True,
	workers=WORKERS,
	verbose=2,
	)

	model.save("categorical_inception_v3.h5")
	# Borrowed from https://github.com/kuza55/keras-extras/blob/master/utils/multi_gpu.py

	import tensorflow as tf


	def make_parallel(model, gpu_count):
	def get_slice(data, idx, parts):
	shape = tf.shape(data)
	size = tf.concat([shape[:1] // parts, shape[1:]], axis=0)
	stride = tf.concat([shape[:1] // parts, shape[1:] * 0], axis=0)
	start = stride * idx
	return tf.slice(data, start, size)

	outputs_all = []
	for i in range(len(model.outputs)):
	outputs_all.append([])

	# Place a copy of the model on each GPU, each getting a slice of the batch
	for i in range(gpu_count):
	with tf.device("/gpu:%d" % i):
	with tf.name_scope("tower_%d" % i) as scope:

	inputs = []
	# Slice each input into a piece for processing on this GPU
	for x in model.inputs:
	input_shape = tuple(x.get_shape().as_list())[1:]
	slice_n = tf.keras.layers.Lambda(
	get_slice,
	output_shape=input_shape,
	arguments={"idx": i, "parts": gpu_count},
	)(x)
	inputs.append(slice_n)

	outputs = model(inputs)

	if not isinstance(outputs, list):
	outputs = [outputs]

	# Save all the outputs for merging back together later
	for l in range(len(outputs)):
	outputs_all[l].append(outputs[l])

	# merge outputs on CPU
	with tf.device("/cpu:0"):
	merged = []
	for outputs in outputs_all:
	merged.append(tf.keras.layers.merge(outputs, mode="concat", concat_axis=0))

	return tf.keras.models.Model(input=model.inputs, output=merged)
	"""Inception V3 model for Keras.

	Note that the input image format for this model is different than for
	the VGG16 and ResNet models (299x299 instead of 224x224),
	and that the input preprocessing function is also different (same as Xception).

	# Reference

	- [Rethinking the Inception Architecture for Computer Vision](
	http://arxiv.org/abs/1512.00567) (CVPR 2016)

	"""

	# Modified from https://github.com/keras-team/keras-applications/blob/master/keras_applications/inception_v3.py
	# -> Basically instead of softmax logits layer I am using sigmoid

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import os

	from keras_applications import get_submodules_from_kwargs
	from keras_applications import imagenet_utils
	from keras_applications.imagenet_utils import decode_predictions
	from keras_applications.imagenet_utils import _obtain_input_shape

	import tensorflow.keras.layers as layers
	import tensorflow.keras.models as models


	WEIGHTS_PATH = (
	'https://github.com/fchollet/deep-learning-models/'
	'releases/download/v0.5/'
	'inception_v3_weights_tf_dim_ordering_tf_kernels.h5')
	WEIGHTS_PATH_NO_TOP = (
	'https://github.com/fchollet/deep-learning-models/'
	'releases/download/v0.5/'
	'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5')

	def conv2d_bn(x,
	filters,
	num_row,
	num_col,
	padding='same',
	strides=(1, 1),
	name=None):
	"""Utility function to apply conv + BN.

	# Arguments
	x: input tensor.
	filters: filters in `Conv2D`.
	num_row: height of the convolution kernel.
	num_col: width of the convolution kernel.
	padding: padding mode in `Conv2D`.
	strides: strides in `Conv2D`.
	name: name of the ops; will become `name + '_conv'`
	for the convolution and `name + '_bn'` for the
	batch norm layer.

	# Returns
	Output tensor after applying `Conv2D` and `BatchNormalization`.
	"""
	if name is not None:
	bn_name = name + '_bn'
	conv_name = name + '_conv'
	else:
	bn_name = None
	conv_name = None
	#if backend.image_data_format() == 'channels_first':
	# bn_axis = 1
	#else:
	# bn_axis = 3
	bn_axis = 3
	x = layers.Conv2D(
	filters, (num_row, num_col),
	strides=strides,
	padding=padding,
	use_bias=False,
	name=conv_name)(x)
	x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
	x = layers.Activation('relu', name=name)(x)
	return x


	def InceptionV3(include_top=True,
	weights='imagenet',
	input_tensor=None,
	input_shape=None,
	pooling=None,
	classes=1000,
	**kwargs):
	"""Instantiates the Inception v3 architecture.

	Optionally loads weights pre-trained on ImageNet.
	Note that the data format convention used by the model is
	the one specified in your Keras config at `~/.keras/keras.json`.

	# Arguments
	include_top: whether to include the fully-connected
	layer at the top of the network.
	weights: one of `None` (random initialization),
	'imagenet' (pre-training on ImageNet),
	or the path to the weights file to be loaded.
	input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
	to use as image input for the model.
	input_shape: optional shape tuple, only to be specified
	if `include_top` is False (otherwise the input shape
	has to be `(299, 299, 3)` (with `channels_last` data format)
	or `(3, 299, 299)` (with `channels_first` data format).
	It should have exactly 3 inputs channels,
	and width and height should be no smaller than 75.
	E.g. `(150, 150, 3)` would be one valid value.
	pooling: Optional pooling mode for feature extraction
	when `include_top` is `False`.
	- `None` means that the output of the model will be
	the 4D tensor output of the
	last convolutional block.
	- `avg` means that global average pooling
	will be applied to the output of the
	last convolutional block, and thus
	the output of the model will be a 2D tensor.
	- `max` means that global max pooling will
	be applied.
	classes: optional number of classes to classify images
	into, only to be specified if `include_top` is True, and
	if no `weights` argument is specified.

	# Returns
	A Keras model instance.

	# Raises
	ValueError: in case of invalid argument for `weights`,
	or invalid input shape.
	"""

	# Determine proper input shape
	input_shape = _obtain_input_shape(
	input_shape,
	default_size=299,
	min_size=75,
	data_format="channels_last",
	require_flatten=include_top,
	weights=weights
	)

	img_input = layers.Input(shape=input_shape)

	channel_axis = 3

	x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid')
	x = conv2d_bn(x, 32, 3, 3, padding='valid')
	x = conv2d_bn(x, 64, 3, 3)
	x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

	x = conv2d_bn(x, 80, 1, 1, padding='valid')
	x = conv2d_bn(x, 192, 3, 3, padding='valid')
	x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

	# mixed 0: 35 x 35 x 256
	branch1x1 = conv2d_bn(x, 64, 1, 1)

	branch5x5 = conv2d_bn(x, 48, 1, 1)
	branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

	branch3x3dbl = conv2d_bn(x, 64, 1, 1)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

	branch_pool = layers.AveragePooling2D((3, 3),
	strides=(1, 1),
	padding='same')(x)
	branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
	x = layers.concatenate(
	[branch1x1, branch5x5, branch3x3dbl, branch_pool],
	axis=channel_axis,
	name='mixed0')

	# mixed 1: 35 x 35 x 288
	branch1x1 = conv2d_bn(x, 64, 1, 1)

	branch5x5 = conv2d_bn(x, 48, 1, 1)
	branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

	branch3x3dbl = conv2d_bn(x, 64, 1, 1)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

	branch_pool = layers.AveragePooling2D((3, 3),
	strides=(1, 1),
	padding='same')(x)
	branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
	x = layers.concatenate(
	[branch1x1, branch5x5, branch3x3dbl, branch_pool],
	axis=channel_axis,
	name='mixed1')

	# mixed 2: 35 x 35 x 288
	branch1x1 = conv2d_bn(x, 64, 1, 1)

	branch5x5 = conv2d_bn(x, 48, 1, 1)
	branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

	branch3x3dbl = conv2d_bn(x, 64, 1, 1)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

	branch_pool = layers.AveragePooling2D((3, 3),
	strides=(1, 1),
	padding='same')(x)
	branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
	x = layers.concatenate(
	[branch1x1, branch5x5, branch3x3dbl, branch_pool],
	axis=channel_axis,
	name='mixed2')

	# mixed 3: 17 x 17 x 768
	branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')

	branch3x3dbl = conv2d_bn(x, 64, 1, 1)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
	branch3x3dbl = conv2d_bn(
	branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')

	branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
	x = layers.concatenate(
	[branch3x3, branch3x3dbl, branch_pool],
	axis=channel_axis,
	name='mixed3')

	# mixed 4: 17 x 17 x 768
	branch1x1 = conv2d_bn(x, 192, 1, 1)

	branch7x7 = conv2d_bn(x, 128, 1, 1)
	branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
	branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

	branch7x7dbl = conv2d_bn(x, 128, 1, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

	branch_pool = layers.AveragePooling2D((3, 3),
	strides=(1, 1),
	padding='same')(x)
	branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
	x = layers.concatenate(
	[branch1x1, branch7x7, branch7x7dbl, branch_pool],
	axis=channel_axis,
	name='mixed4')

	# mixed 5, 6: 17 x 17 x 768
	for i in range(2):
	branch1x1 = conv2d_bn(x, 192, 1, 1)

	branch7x7 = conv2d_bn(x, 160, 1, 1)
	branch7x7 = conv2d_bn(branch7x7, 160, 1, 7)
	branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

	branch7x7dbl = conv2d_bn(x, 160, 1, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

	branch_pool = layers.AveragePooling2D(
	(3, 3), strides=(1, 1), padding='same')(x)
	branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
	x = layers.concatenate(
	[branch1x1, branch7x7, branch7x7dbl, branch_pool],
	axis=channel_axis,
	name='mixed' + str(5 + i))

	# mixed 7: 17 x 17 x 768
	branch1x1 = conv2d_bn(x, 192, 1, 1)

	branch7x7 = conv2d_bn(x, 192, 1, 1)
	branch7x7 = conv2d_bn(branch7x7, 192, 1, 7)
	branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

	branch7x7dbl = conv2d_bn(x, 192, 1, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
	branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

	branch_pool = layers.AveragePooling2D((3, 3),
	strides=(1, 1),
	padding='same')(x)
	branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
	x = layers.concatenate(
	[branch1x1, branch7x7, branch7x7dbl, branch_pool],
	axis=channel_axis,
	name='mixed7')

	# mixed 8: 8 x 8 x 1280
	branch3x3 = conv2d_bn(x, 192, 1, 1)
	branch3x3 = conv2d_bn(branch3x3, 320, 3, 3,
	strides=(2, 2), padding='valid')

	branch7x7x3 = conv2d_bn(x, 192, 1, 1)
	branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
	branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
	branch7x7x3 = conv2d_bn(
	branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid')

	branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
	x = layers.concatenate(
	[branch3x3, branch7x7x3, branch_pool],
	axis=channel_axis,
	name='mixed8')

	# mixed 9: 8 x 8 x 2048
	for i in range(2):
	branch1x1 = conv2d_bn(x, 320, 1, 1)

	branch3x3 = conv2d_bn(x, 384, 1, 1)
	branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
	branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
	branch3x3 = layers.concatenate(
	[branch3x3_1, branch3x3_2],
	axis=channel_axis,
	name='mixed9_' + str(i))

	branch3x3dbl = conv2d_bn(x, 448, 1, 1)
	branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
	branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
	branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
	branch3x3dbl = layers.concatenate(
	[branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis)

	branch_pool = layers.AveragePooling2D(
	(3, 3), strides=(1, 1), padding='same')(x)
	branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
	x = layers.concatenate(
	[branch1x1, branch3x3, branch3x3dbl, branch_pool],
	axis=channel_axis,
	name='mixed' + str(9 + i))
	if include_top:
	# Classification block
	x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
	x = layers.Dense(classes, activation='sigmoid', name='predictions')(x)
	else:
	if pooling == 'avg':
	x = layers.GlobalAveragePooling2D()(x)
	elif pooling == 'max':
	x = layers.GlobalMaxPooling2D()(x)

	# Ensure that the model takes into account
	# any potential predecessors of `input_tensor`.
	if input_tensor is not None:
	inputs = keras_utils.get_source_inputs(input_tensor)
	else:
	inputs = img_input
	# Create model.
	model = models.Model(inputs, x, name='inception_v3')

	# Load weights.
	if weights == 'imagenet':
	if include_top:
	weights_path = keras_utils.get_file(
	'inception_v3_weights_tf_dim_ordering_tf_kernels.h5',
	WEIGHTS_PATH,
	cache_subdir='models',
	file_hash='9a0d58056eeedaa3f26cb7ebd46da564')
	else:
	weights_path = keras_utils.get_file(
	'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5',
	WEIGHTS_PATH_NO_TOP,
	cache_subdir='models',
	file_hash='bcbd6486424b2319ff4ef7d526e38f63')
	model.load_weights(weights_path)
	elif weights is not None:
	model.load_weights(weights)

	return model


	def preprocess_input(x, **kwargs):
	"""Preprocesses a numpy array encoding a batch of images.

	# Arguments
	x: a 4D numpy array consists of RGB values within [0, 255].

	# Returns
	Preprocessed array.
	"""
	return imagenet_utils.preprocess_input(x, mode='tf', **kwargs)