Created
January 29, 2020 02:17
-
-
Save kyamagu/695c127f7f457b5a5cee4aaaa80b6336 to your computer and use it in GitHub Desktop.
slow-checkpoint-batchnorm.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "slow-checkpoint-batchnorm.ipynb", | |
"provenance": [], | |
"authorship_tag": "ABX9TyNvQbNqbsKqAjpRwXUbpO5b", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/kyamagu/695c127f7f457b5a5cee4aaaa80b6336/slow-checkpoint-batchnorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "sCnK8rMw4eqX", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 97 | |
}, | |
"outputId": "4b160cb0-57c5-41d7-becf-1c9f28d538a0" | |
}, | |
"source": [ | |
"import tensorflow as tf\n", | |
"import numpy as np\n", | |
"import tempfile\n", | |
"import os\n", | |
"import time\n", | |
"\n", | |
"print('Tensorflow %s' % tf.__version__)\n", | |
"print(tf.config.experimental.list_physical_devices('GPU'))\n", | |
"\n", | |
"IMAGE_SIZE = (256, 256, 3)\n", | |
"\n", | |
"def load_random(target):\n", | |
" image = tf.io.decode_raw(os.urandom(np.prod(IMAGE_SIZE)), tf.uint8)\n", | |
" image = tf.reshape(image, IMAGE_SIZE)\n", | |
" image = tf.image.convert_image_dtype(image, tf.float32)\n", | |
" return image, target\n", | |
"\n", | |
"def create_dataset():\n", | |
" ds = tf.data.Dataset.from_tensor_slices(np.random.rand(500, 1))\n", | |
" ds = ds.map(load_random).batch(50).repeat()\n", | |
" return ds\n", | |
"\n", | |
"def create_model(arch='ResNet50V2'):\n", | |
" cnn = getattr(tf.keras.applications, arch)(\n", | |
" input_shape=IMAGE_SIZE, weights=None, include_top=False, pooling='avg')\n", | |
" y = tf.keras.layers.Dense(1)(cnn.output)\n", | |
" model = tf.keras.models.Model(inputs=cnn.input, outputs=y)\n", | |
" model.compile(loss='mse', optimizer='sgd')\n", | |
" return model\n", | |
"\n", | |
"class Checkpointer(tf.keras.callbacks.ModelCheckpoint):\n", | |
" def on_epoch_end(self, epoch, logs=None):\n", | |
" start = time.time()\n", | |
" super(Checkpointer, self).on_epoch_end(epoch, logs=logs)\n", | |
" print('Checkpoint elapsed: %g seconds' % (time.time() - start))\n", | |
"\n", | |
"def train_without_strategy(**kwargs):\n", | |
" tf.compat.v1.keras.backend.clear_session()\n", | |
" with tempfile.TemporaryDirectory() as d:\n", | |
" ds = create_dataset()\n", | |
" model = create_model(**kwargs)\n", | |
" checkpointer = Checkpointer(d + \"/checkpoint.{epoch:02d}.hdf5\")\n", | |
" model.fit(\n", | |
" ds, callbacks=[checkpointer], epochs=5, steps_per_epoch=10, verbose=2)\n", | |
"\n", | |
"def train_with_strategy(**kwargs):\n", | |
" tf.compat.v1.keras.backend.clear_session()\n", | |
" with tempfile.TemporaryDirectory() as d:\n", | |
" strategy = tf.distribute.MirroredStrategy()\n", | |
" with strategy.scope():\n", | |
" ds = create_dataset()\n", | |
" model = create_model(**kwargs)\n", | |
" checkpointer = Checkpointer(d + \"/checkpoint.{epoch:02d}.hdf5\")\n", | |
" model.fit(\n", | |
" ds, callbacks=[checkpointer], epochs=5, steps_per_epoch=10, verbose=2)" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<p style=\"color: red;\">\n", | |
"The default version of TensorFlow in Colab will soon switch to TensorFlow 2.x.<br>\n", | |
"We recommend you <a href=\"https://www.tensorflow.org/guide/migrate\" target=\"_blank\">upgrade</a> now \n", | |
"or ensure your notebook will continue to use TensorFlow 1.x via the <code>%tensorflow_version 1.x</code> magic:\n", | |
"<a href=\"https://colab.research.google.com/notebooks/tensorflow_version.ipynb\" target=\"_blank\">more info</a>.</p>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Tensorflow 1.15.0\n", | |
"[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "GHg1Rf8x-MHD", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 377 | |
}, | |
"outputId": "c5162be5-a4ba-40af-fa5a-71fba2545dca" | |
}, | |
"source": [ | |
"train_without_strategy(arch='VGG16')" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"If using Keras pass *_constraint arguments to layers.\n", | |
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n", | |
"Train on 10 steps\n", | |
"Epoch 1/5\n", | |
"Checkpoint elapsed: 0.209552 seconds\n", | |
"10/10 - 22s - loss: 0.2506\n", | |
"Epoch 2/5\n", | |
"Checkpoint elapsed: 0.108256 seconds\n", | |
"10/10 - 9s - loss: 0.1325\n", | |
"Epoch 3/5\n", | |
"Checkpoint elapsed: 0.07974 seconds\n", | |
"10/10 - 9s - loss: 0.0914\n", | |
"Epoch 4/5\n", | |
"Checkpoint elapsed: 0.0784442 seconds\n", | |
"10/10 - 9s - loss: 0.0825\n", | |
"Epoch 5/5\n", | |
"Checkpoint elapsed: 0.112553 seconds\n", | |
"10/10 - 9s - loss: 0.0815\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_4_6TcmyDVeL", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 306 | |
}, | |
"outputId": "96919351-3a6e-4d50-af0d-66f1fd41fbc2" | |
}, | |
"source": [ | |
"train_with_strategy(arch='VGG16')" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n", | |
"Train on 10 steps\n", | |
"Epoch 1/5\n", | |
"Checkpoint elapsed: 0.2588 seconds\n", | |
"10/10 - 9s - loss: 0.2496\n", | |
"Epoch 2/5\n", | |
"Checkpoint elapsed: 0.0796804 seconds\n", | |
"10/10 - 9s - loss: 0.1393\n", | |
"Epoch 3/5\n", | |
"Checkpoint elapsed: 0.077446 seconds\n", | |
"10/10 - 9s - loss: 0.0967\n", | |
"Epoch 4/5\n", | |
"Checkpoint elapsed: 0.0802891 seconds\n", | |
"10/10 - 9s - loss: 0.0855\n", | |
"Epoch 5/5\n", | |
"Checkpoint elapsed: 0.103764 seconds\n", | |
"10/10 - 9s - loss: 0.0838\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "01tFjMFiDz_s", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 306 | |
}, | |
"outputId": "57260af5-2381-40ff-c65d-1dd186285b85" | |
}, | |
"source": [ | |
"train_without_strategy(arch='ResNet50V2')" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n", | |
"Train on 10 steps\n", | |
"Epoch 1/5\n", | |
"Checkpoint elapsed: 10.1309 seconds\n", | |
"10/10 - 24s - loss: 8438.6164\n", | |
"Epoch 2/5\n", | |
"Checkpoint elapsed: 0.403354 seconds\n", | |
"10/10 - 7s - loss: 11.3105\n", | |
"Epoch 3/5\n", | |
"Checkpoint elapsed: 0.350576 seconds\n", | |
"10/10 - 7s - loss: 3.2780\n", | |
"Epoch 4/5\n", | |
"Checkpoint elapsed: 0.457663 seconds\n", | |
"10/10 - 7s - loss: 1.0615\n", | |
"Epoch 5/5\n", | |
"Checkpoint elapsed: 0.408976 seconds\n", | |
"10/10 - 7s - loss: 0.3888\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "QExQQ0CR5-jq", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 476 | |
}, | |
"outputId": "3bc70ed8-8ef0-4cc4-e8d0-1432e9ee29db" | |
}, | |
"source": [ | |
"train_with_strategy(arch='ResNet50V2')" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n", | |
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n", | |
"Train on 10 steps\n", | |
"Epoch 1/5\n", | |
"Checkpoint elapsed: 17.68 seconds\n", | |
"10/10 - 24s - loss: 7063.1463\n", | |
"Epoch 2/5\n", | |
"Checkpoint elapsed: 14.1126 seconds\n", | |
"10/10 - 20s - loss: 11.1796\n", | |
"Epoch 3/5\n", | |
"Checkpoint elapsed: 14.6897 seconds\n", | |
"10/10 - 20s - loss: 4.3299\n", | |
"Epoch 4/5\n", | |
"Checkpoint elapsed: 15.5147 seconds\n", | |
"10/10 - 21s - loss: 2.5390\n", | |
"Epoch 5/5\n", | |
"Checkpoint elapsed: 17.0302 seconds\n", | |
"10/10 - 23s - loss: 1.6338\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment