Skip to content

Instantly share code, notes, and snippets.

@kyamagu
Created January 29, 2020 02:17
Show Gist options
  • Save kyamagu/695c127f7f457b5a5cee4aaaa80b6336 to your computer and use it in GitHub Desktop.
Save kyamagu/695c127f7f457b5a5cee4aaaa80b6336 to your computer and use it in GitHub Desktop.
slow-checkpoint-batchnorm.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "slow-checkpoint-batchnorm.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyNvQbNqbsKqAjpRwXUbpO5b",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/kyamagu/695c127f7f457b5a5cee4aaaa80b6336/slow-checkpoint-batchnorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "sCnK8rMw4eqX",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 97
},
"outputId": "4b160cb0-57c5-41d7-becf-1c9f28d538a0"
},
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import tempfile\n",
"import os\n",
"import time\n",
"\n",
"print('Tensorflow %s' % tf.__version__)\n",
"print(tf.config.experimental.list_physical_devices('GPU'))\n",
"\n",
"IMAGE_SIZE = (256, 256, 3)\n",
"\n",
"def load_random(target):\n",
" image = tf.io.decode_raw(os.urandom(np.prod(IMAGE_SIZE)), tf.uint8)\n",
" image = tf.reshape(image, IMAGE_SIZE)\n",
" image = tf.image.convert_image_dtype(image, tf.float32)\n",
" return image, target\n",
"\n",
"def create_dataset():\n",
" ds = tf.data.Dataset.from_tensor_slices(np.random.rand(500, 1))\n",
" ds = ds.map(load_random).batch(50).repeat()\n",
" return ds\n",
"\n",
"def create_model(arch='ResNet50V2'):\n",
" cnn = getattr(tf.keras.applications, arch)(\n",
" input_shape=IMAGE_SIZE, weights=None, include_top=False, pooling='avg')\n",
" y = tf.keras.layers.Dense(1)(cnn.output)\n",
" model = tf.keras.models.Model(inputs=cnn.input, outputs=y)\n",
" model.compile(loss='mse', optimizer='sgd')\n",
" return model\n",
"\n",
"class Checkpointer(tf.keras.callbacks.ModelCheckpoint):\n",
" def on_epoch_end(self, epoch, logs=None):\n",
" start = time.time()\n",
" super(Checkpointer, self).on_epoch_end(epoch, logs=logs)\n",
" print('Checkpoint elapsed: %g seconds' % (time.time() - start))\n",
"\n",
"def train_without_strategy(**kwargs):\n",
" tf.compat.v1.keras.backend.clear_session()\n",
" with tempfile.TemporaryDirectory() as d:\n",
" ds = create_dataset()\n",
" model = create_model(**kwargs)\n",
" checkpointer = Checkpointer(d + \"/checkpoint.{epoch:02d}.hdf5\")\n",
" model.fit(\n",
" ds, callbacks=[checkpointer], epochs=5, steps_per_epoch=10, verbose=2)\n",
"\n",
"def train_with_strategy(**kwargs):\n",
" tf.compat.v1.keras.backend.clear_session()\n",
" with tempfile.TemporaryDirectory() as d:\n",
" strategy = tf.distribute.MirroredStrategy()\n",
" with strategy.scope():\n",
" ds = create_dataset()\n",
" model = create_model(**kwargs)\n",
" checkpointer = Checkpointer(d + \"/checkpoint.{epoch:02d}.hdf5\")\n",
" model.fit(\n",
" ds, callbacks=[checkpointer], epochs=5, steps_per_epoch=10, verbose=2)"
],
"execution_count": 1,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"<p style=\"color: red;\">\n",
"The default version of TensorFlow in Colab will soon switch to TensorFlow 2.x.<br>\n",
"We recommend you <a href=\"https://www.tensorflow.org/guide/migrate\" target=\"_blank\">upgrade</a> now \n",
"or ensure your notebook will continue to use TensorFlow 1.x via the <code>%tensorflow_version 1.x</code> magic:\n",
"<a href=\"https://colab.research.google.com/notebooks/tensorflow_version.ipynb\" target=\"_blank\">more info</a>.</p>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"Tensorflow 1.15.0\n",
"[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "GHg1Rf8x-MHD",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 377
},
"outputId": "c5162be5-a4ba-40af-fa5a-71fba2545dca"
},
"source": [
"train_without_strategy(arch='VGG16')"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"If using Keras pass *_constraint arguments to layers.\n",
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n",
"Train on 10 steps\n",
"Epoch 1/5\n",
"Checkpoint elapsed: 0.209552 seconds\n",
"10/10 - 22s - loss: 0.2506\n",
"Epoch 2/5\n",
"Checkpoint elapsed: 0.108256 seconds\n",
"10/10 - 9s - loss: 0.1325\n",
"Epoch 3/5\n",
"Checkpoint elapsed: 0.07974 seconds\n",
"10/10 - 9s - loss: 0.0914\n",
"Epoch 4/5\n",
"Checkpoint elapsed: 0.0784442 seconds\n",
"10/10 - 9s - loss: 0.0825\n",
"Epoch 5/5\n",
"Checkpoint elapsed: 0.112553 seconds\n",
"10/10 - 9s - loss: 0.0815\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "_4_6TcmyDVeL",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 306
},
"outputId": "96919351-3a6e-4d50-af0d-66f1fd41fbc2"
},
"source": [
"train_with_strategy(arch='VGG16')"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n",
"Train on 10 steps\n",
"Epoch 1/5\n",
"Checkpoint elapsed: 0.2588 seconds\n",
"10/10 - 9s - loss: 0.2496\n",
"Epoch 2/5\n",
"Checkpoint elapsed: 0.0796804 seconds\n",
"10/10 - 9s - loss: 0.1393\n",
"Epoch 3/5\n",
"Checkpoint elapsed: 0.077446 seconds\n",
"10/10 - 9s - loss: 0.0967\n",
"Epoch 4/5\n",
"Checkpoint elapsed: 0.0802891 seconds\n",
"10/10 - 9s - loss: 0.0855\n",
"Epoch 5/5\n",
"Checkpoint elapsed: 0.103764 seconds\n",
"10/10 - 9s - loss: 0.0838\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "01tFjMFiDz_s",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 306
},
"outputId": "57260af5-2381-40ff-c65d-1dd186285b85"
},
"source": [
"train_without_strategy(arch='ResNet50V2')"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": [
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n",
"Train on 10 steps\n",
"Epoch 1/5\n",
"Checkpoint elapsed: 10.1309 seconds\n",
"10/10 - 24s - loss: 8438.6164\n",
"Epoch 2/5\n",
"Checkpoint elapsed: 0.403354 seconds\n",
"10/10 - 7s - loss: 11.3105\n",
"Epoch 3/5\n",
"Checkpoint elapsed: 0.350576 seconds\n",
"10/10 - 7s - loss: 3.2780\n",
"Epoch 4/5\n",
"Checkpoint elapsed: 0.457663 seconds\n",
"10/10 - 7s - loss: 1.0615\n",
"Epoch 5/5\n",
"Checkpoint elapsed: 0.408976 seconds\n",
"10/10 - 7s - loss: 0.3888\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "QExQQ0CR5-jq",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 476
},
"outputId": "3bc70ed8-8ef0-4cc4-e8d0-1432e9ee29db"
},
"source": [
"train_with_strategy(arch='ResNet50V2')"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": [
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /replica:0/task:0/device:CPU:0 then broadcast to ('/replica:0/task:0/device:CPU:0',).\n",
"WARNING:tensorflow:Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.\n",
"Train on 10 steps\n",
"Epoch 1/5\n",
"Checkpoint elapsed: 17.68 seconds\n",
"10/10 - 24s - loss: 7063.1463\n",
"Epoch 2/5\n",
"Checkpoint elapsed: 14.1126 seconds\n",
"10/10 - 20s - loss: 11.1796\n",
"Epoch 3/5\n",
"Checkpoint elapsed: 14.6897 seconds\n",
"10/10 - 20s - loss: 4.3299\n",
"Epoch 4/5\n",
"Checkpoint elapsed: 15.5147 seconds\n",
"10/10 - 21s - loss: 2.5390\n",
"Epoch 5/5\n",
"Checkpoint elapsed: 17.0302 seconds\n",
"10/10 - 23s - loss: 1.6338\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment