Last active
May 8, 2021 01:18
-
-
Save lijiansong/f85fade219847c60a456267dc33a50f3 to your computer and use it in GitHub Desktop.
tutorial_basic_tf2_example.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.5" | |
}, | |
"pycharm": { | |
"stem_cell": { | |
"cell_type": "raw", | |
"source": [], | |
"metadata": { | |
"collapsed": false | |
} | |
} | |
}, | |
"colab": { | |
"name": "tutorial_basic_tf2_example.ipynb", | |
"provenance": [], | |
"include_colab_link": true | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/lijiansong/f85fade219847c60a456267dc33a50f3/tutorial_basic_tf2_example.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"pycharm": { | |
"is_executing": false, | |
"name": "#%%\n" | |
}, | |
"scrolled": true, | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "BEqRg_8WfwOj", | |
"outputId": "2928bc47-37ac-4d49-fd61-bdb2c7c02d0e" | |
}, | |
"source": [ | |
"!sudo apt install coinor-cbc coinor-libcbc-dev\n", | |
"!pip install cylp tensorflow-gpu>=2.0.0 tqdm\n", | |
"\n", | |
"import os\n", | |
"try:\n", | |
" from checkmate.tf2 import get_keras_model\n", | |
"except:\n", | |
" !git clone https://github.com/parasj/checkmate.git\n", | |
" os.chdir('./checkmate')\n", | |
" !pip install -e .\n" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Reading package lists... Done\n", | |
"Building dependency tree \n", | |
"Reading state information... Done\n", | |
"The following package was automatically installed and is no longer required:\n", | |
" libnvidia-common-460\n", | |
"Use 'sudo apt autoremove' to remove it.\n", | |
"The following additional packages will be installed:\n", | |
" coinor-libcbc3 coinor-libcgl-dev coinor-libcgl1 coinor-libclp-dev\n", | |
" coinor-libclp1 coinor-libcoinutils-dev coinor-libcoinutils3v5\n", | |
" coinor-libosi-dev coinor-libosi1v5\n", | |
"The following NEW packages will be installed:\n", | |
" coinor-cbc coinor-libcbc-dev coinor-libcbc3 coinor-libcgl-dev coinor-libcgl1\n", | |
" coinor-libclp-dev coinor-libclp1 coinor-libcoinutils-dev\n", | |
" coinor-libcoinutils3v5 coinor-libosi-dev coinor-libosi1v5\n", | |
"0 upgraded, 11 newly installed, 0 to remove and 34 not upgraded.\n", | |
"Need to get 6,278 kB of archives.\n", | |
"After this operation, 28.4 MB of additional disk space will be used.\n", | |
"Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libcoinutils3v5 amd64 2.10.14+repack1-1 [472 kB]\n", | |
"Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libosi1v5 amd64 0.107.9+repack1-1 [270 kB]\n", | |
"Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libclp1 amd64 1.16.11+repack1-1 [863 kB]\n", | |
"Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libcgl1 amd64 0.59.10+repack1-1 [433 kB]\n", | |
"Get:5 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libcbc3 amd64 2.9.9+repack1-1 [687 kB]\n", | |
"Get:6 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-cbc amd64 2.9.9+repack1-1 [11.2 kB]\n", | |
"Get:7 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libcoinutils-dev amd64 2.10.14+repack1-1 [795 kB]\n", | |
"Get:8 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libosi-dev amd64 0.107.9+repack1-1 [332 kB]\n", | |
"Get:9 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libclp-dev amd64 1.16.11+repack1-1 [1,038 kB]\n", | |
"Get:10 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libcgl-dev amd64 0.59.10+repack1-1 [530 kB]\n", | |
"Get:11 http://archive.ubuntu.com/ubuntu bionic/universe amd64 coinor-libcbc-dev amd64 2.9.9+repack1-1 [846 kB]\n", | |
"Fetched 6,278 kB in 1s (4,787 kB/s)\n", | |
"debconf: unable to initialize frontend: Dialog\n", | |
"debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 11.)\n", | |
"debconf: falling back to frontend: Readline\n", | |
"debconf: unable to initialize frontend: Readline\n", | |
"debconf: (This frontend requires a controlling tty.)\n", | |
"debconf: falling back to frontend: Teletype\n", | |
"dpkg-preconfigure: unable to re-open stdin: \n", | |
"Selecting previously unselected package coinor-libcoinutils3v5.\n", | |
"(Reading database ... 160706 files and directories currently installed.)\n", | |
"Preparing to unpack .../00-coinor-libcoinutils3v5_2.10.14+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libcoinutils3v5 (2.10.14+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libosi1v5.\n", | |
"Preparing to unpack .../01-coinor-libosi1v5_0.107.9+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libosi1v5 (0.107.9+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libclp1.\n", | |
"Preparing to unpack .../02-coinor-libclp1_1.16.11+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libclp1 (1.16.11+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libcgl1.\n", | |
"Preparing to unpack .../03-coinor-libcgl1_0.59.10+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libcgl1 (0.59.10+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libcbc3.\n", | |
"Preparing to unpack .../04-coinor-libcbc3_2.9.9+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libcbc3 (2.9.9+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-cbc.\n", | |
"Preparing to unpack .../05-coinor-cbc_2.9.9+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-cbc (2.9.9+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libcoinutils-dev.\n", | |
"Preparing to unpack .../06-coinor-libcoinutils-dev_2.10.14+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libcoinutils-dev (2.10.14+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libosi-dev.\n", | |
"Preparing to unpack .../07-coinor-libosi-dev_0.107.9+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libosi-dev (0.107.9+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libclp-dev.\n", | |
"Preparing to unpack .../08-coinor-libclp-dev_1.16.11+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libclp-dev (1.16.11+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libcgl-dev.\n", | |
"Preparing to unpack .../09-coinor-libcgl-dev_0.59.10+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libcgl-dev (0.59.10+repack1-1) ...\n", | |
"Selecting previously unselected package coinor-libcbc-dev.\n", | |
"Preparing to unpack .../10-coinor-libcbc-dev_2.9.9+repack1-1_amd64.deb ...\n", | |
"Unpacking coinor-libcbc-dev (2.9.9+repack1-1) ...\n", | |
"Setting up coinor-libcoinutils3v5 (2.10.14+repack1-1) ...\n", | |
"Setting up coinor-libosi1v5 (0.107.9+repack1-1) ...\n", | |
"Setting up coinor-libclp1 (1.16.11+repack1-1) ...\n", | |
"Setting up coinor-libosi-dev (0.107.9+repack1-1) ...\n", | |
"Setting up coinor-libcoinutils-dev (2.10.14+repack1-1) ...\n", | |
"Setting up coinor-libcgl1 (0.59.10+repack1-1) ...\n", | |
"Setting up coinor-libclp-dev (1.16.11+repack1-1) ...\n", | |
"Setting up coinor-libcbc3 (2.9.9+repack1-1) ...\n", | |
"Setting up coinor-libcgl-dev (0.59.10+repack1-1) ...\n", | |
"Setting up coinor-cbc (2.9.9+repack1-1) ...\n", | |
"Setting up coinor-libcbc-dev (2.9.9+repack1-1) ...\n", | |
"Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n", | |
"Processing triggers for libc-bin (2.27-3ubuntu1.2) ...\n", | |
"/sbin/ldconfig.real: /usr/local/lib/python3.7/dist-packages/ideep4py/lib/libmkldnn.so.0 is not a symbolic link\n", | |
"\n", | |
"Cloning into 'checkmate'...\n", | |
"remote: Enumerating objects: 1290, done.\u001b[K\n", | |
"remote: Counting objects: 100% (84/84), done.\u001b[K\n", | |
"remote: Compressing objects: 100% (67/67), done.\u001b[K\n", | |
"remote: Total 1290 (delta 38), reused 40 (delta 15), pack-reused 1206\u001b[K\n", | |
"Receiving objects: 100% (1290/1290), 570.73 KiB | 5.65 MiB/s, done.\n", | |
"Resolving deltas: 100% (758/758), done.\n", | |
"Obtaining file:///content/checkmate\n", | |
"Requirement already satisfied: cvxpy in /usr/local/lib/python3.7/dist-packages (from checkmate==0.1.0) (1.0.31)\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from checkmate==0.1.0) (1.19.5)\n", | |
"Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from checkmate==0.1.0) (1.1.5)\n", | |
"Collecting toposort\n", | |
" Downloading https://files.pythonhosted.org/packages/f2/7d/55784e894ee0cde2474fb977ffd1651e74e840a9f92e1d847f7e3115d5ec/toposort-1.6-py2.py3-none-any.whl\n", | |
"Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from checkmate==0.1.0) (5.4.8)\n", | |
"Requirement already satisfied: scs>=1.1.3 in /usr/local/lib/python3.7/dist-packages (from cvxpy->checkmate==0.1.0) (2.1.3)\n", | |
"Requirement already satisfied: ecos>=2 in /usr/local/lib/python3.7/dist-packages (from cvxpy->checkmate==0.1.0) (2.0.7.post1)\n", | |
"Requirement already satisfied: osqp>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from cvxpy->checkmate==0.1.0) (0.6.2.post0)\n", | |
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from cvxpy->checkmate==0.1.0) (0.70.11.1)\n", | |
"Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from cvxpy->checkmate==0.1.0) (1.4.1)\n", | |
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->checkmate==0.1.0) (2018.9)\n", | |
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->checkmate==0.1.0) (2.8.1)\n", | |
"Requirement already satisfied: qdldl in /usr/local/lib/python3.7/dist-packages (from osqp>=0.4.1->cvxpy->checkmate==0.1.0) (0.1.5.post0)\n", | |
"Requirement already satisfied: dill>=0.3.3 in /usr/local/lib/python3.7/dist-packages (from multiprocess->cvxpy->checkmate==0.1.0) (0.3.3)\n", | |
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->checkmate==0.1.0) (1.15.0)\n", | |
"Installing collected packages: toposort, checkmate\n", | |
" Running setup.py develop for checkmate\n", | |
"Successfully installed checkmate toposort-1.6\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"pycharm": { | |
"is_executing": false, | |
"name": "#%%\n" | |
}, | |
"id": "DHba_YCTfwOp" | |
}, | |
"source": [ | |
"import logging\n", | |
"import numpy as np\n", | |
"import tensorflow as tf\n", | |
"from checkmate.tf2 import get_keras_model\n", | |
"from tqdm import tqdm\n", | |
"logging.basicConfig(level=logging.DEBUG)" | |
], | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "VB52bmCOfwOq" | |
}, | |
"source": [ | |
"# Checkmate getting started guide\n", | |
"Checkmate is a system for training large neural neural networks on memory-constrained hardware. State-of-the-art models require\n", | |
"increasing amounts of GPU memory. Checkmate traces your TensorFlow application and efficiently reschedules the TF graph so that\n", | |
"total memory requirements are under the memory budget of your GPU.\n", | |
"\n", | |
"In this tutorial, we walk through how to train a computer vision model with a basic application of Checkmate. While this \n", | |
"application would likely fit within the limits of most GPUs, it serves to illustrate the mechanics of using Checkmate.\n", | |
"\n", | |
"## Loading CIFAR10 using keras\n", | |
"Checkmate optimizes any TensorFlow 2.0 graph. In this example, we load CIFAR10 as an example. We also use a basic few-layer neural network as an example." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"pycharm": { | |
"is_executing": false, | |
"name": "#%%\n" | |
}, | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "DOSHaHndfwOq", | |
"outputId": "c360c555-7351-469c-92f7-e0fbdad46f64" | |
}, | |
"source": [ | |
"# load cifar10 dataset\n", | |
"batch_size = 1024\n", | |
"(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()\n", | |
"x_train, x_test = x_train / 255.0, x_test / 255.0\n", | |
"x_train, y_train = x_train.astype(np.float32), y_train.astype(np.float32)\n", | |
"x_test, y_test = x_test.astype(np.float32), y_test.astype(np.float32)\n", | |
"train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)\n", | |
"test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)\n", | |
"\n", | |
"# load TensorFlow model from Keras applications along with loss function and optimizer\n", | |
"model = get_keras_model(\"test\", input_shape=x_train[0].shape, num_classes=10)\n", | |
"loss = tf.keras.losses.SparseCategoricalCrossentropy()\n", | |
"optimizer = tf.keras.optimizers.Adam()\n", | |
"model.compile(optimizer=optimizer, loss=loss)" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", | |
"170500096/170498071 [==============================] - 2s 0us/step\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"pycharm": { | |
"name": "#%% md\n" | |
}, | |
"id": "emPkj3hVfwOr" | |
}, | |
"source": [ | |
"## Recompiling the TensorFlow test model using Checkmate\n", | |
"Checkmate exposes a convenience function `checkmate.tf2.compile_tf2` that will take a Keras model and return\n", | |
"a `tf.Function` that runs a single training iteration over a batch. In order to accurately measure memory\n", | |
"consumption per operation, Checkmate needs to know the full size of the inputs to your model. The training\n", | |
"dataset usually contains this under `train_ds.element_spec`. Note that `element_spec` will also return the\n", | |
"shape of the output, which is not needed." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"pycharm": { | |
"is_executing": false, | |
"name": "#%%\n" | |
}, | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "kTKSUsIVfwOr", | |
"outputId": "7ac6afe0-5774-4aa9-c7b3-e5fb8e19c9f4" | |
}, | |
"source": [ | |
"from checkmate.tf2.wrapper import compile_tf2\n", | |
"element_spec = train_ds.__iter__().__next__()\n", | |
"train_iteration = compile_tf2(\n", | |
" model,\n", | |
" loss=loss,\n", | |
" optimizer=optimizer,\n", | |
" input_spec=element_spec[0], # retrieve first element of dataset\n", | |
" label_spec=element_spec[1]\n", | |
")" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:From /content/checkmate/checkmate/tf2/wrapper.py:24: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Use `tf.config.list_physical_devices('GPU')` instead.\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:From /content/checkmate/checkmate/tf2/wrapper.py:24: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Use `tf.config.list_physical_devices('GPU')` instead.\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Solve: 78.140s\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"pycharm": { | |
"is_executing": false, | |
"name": "#%% md\n" | |
}, | |
"id": "mwVaApeRfwOr" | |
}, | |
"source": [ | |
"# Training the large neural network\n", | |
"Checkmate has now recompiled our training function. We can continue to use existing TensorFlow functionality for training neural networks, but we substitute the call to the model with Checkmate's version of the training iteration." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"pycharm": { | |
"is_executing": false, | |
"name": "#%%\n" | |
}, | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "9NIp7xf3fwOs", | |
"outputId": "4fa858ee-d45f-4ca5-84f5-b426cdaf4a3b" | |
}, | |
"source": [ | |
"train_loss = tf.keras.metrics.Mean(name=\"train_loss\")\n", | |
"train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name=\"train_accuracy\")\n", | |
"test_loss = tf.keras.metrics.Mean(name=\"test_loss\")\n", | |
"test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name=\"test_accuracy\")\n", | |
"\n", | |
"for epoch in range(10):\n", | |
" # Reset the metrics at the start of the next epoch\n", | |
" train_loss.reset_states()\n", | |
" train_accuracy.reset_states()\n", | |
" test_loss.reset_states()\n", | |
" test_accuracy.reset_states()\n", | |
"\n", | |
" with tqdm(total=x_train.shape[0]) as pbar:\n", | |
" for images, labels in train_ds:\n", | |
" predictions, loss_value = train_iteration(images, labels)\n", | |
" train_loss(loss_value)\n", | |
" train_accuracy(labels, predictions)\n", | |
" pbar.update(images.shape[0])\n", | |
" pbar.set_description('Train epoch {}; loss={:0.4f}, acc={:0.4f}'.format(epoch + 1, train_loss.result(), train_accuracy.result()))\n", | |
"\n", | |
" with tqdm(total=x_test.shape[0]) as pbar:\n", | |
" for images, labels in test_ds:\n", | |
" predictions = model(images)\n", | |
" test_loss_value = loss(labels, predictions)\n", | |
" test_loss(test_loss_value)\n", | |
" test_accuracy(labels, predictions)\n", | |
" pbar.update(images.shape[0])\n", | |
" pbar.set_description('Valid epoch {}, loss={:0.4f}, acc={:0.4f}'.format(epoch + 1, test_loss.result(), test_accuracy.result()))\n" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Train epoch 1; loss=2.2300, acc=0.1539: 100%|██████████| 50000/50000 [03:00<00:00, 276.44it/s]\n", | |
"Valid epoch 1, loss=2.1219, acc=0.1985: 100%|██████████| 10000/10000 [00:14<00:00, 704.35it/s]\n", | |
"Train epoch 2; loss=2.0790, acc=0.2239: 100%|██████████| 50000/50000 [02:58<00:00, 279.50it/s]\n", | |
"Valid epoch 2, loss=2.0443, acc=0.2479: 100%|██████████| 10000/10000 [00:13<00:00, 720.25it/s]\n", | |
"Train epoch 3; loss=2.0341, acc=0.2480: 70%|██████▉ | 34816/50000 [02:03<00:53, 284.02it/s]" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "q33-IR4KirUu" | |
}, | |
"source": [ | |
"" | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://github.com/parasj/checkmate