Created
May 20, 2018 20:28
-
-
Save kechan/38b4e6c85501246a2a49deadd26aefd7 to your computer and use it in GitHub Desktop.
Keras Transfer Learning with feature caching.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Keras Transfer Learning with feature caching.ipynb", | |
"version": "0.3.2", | |
"provenance": [], | |
"collapsed_sections": [] | |
}, | |
"kernelspec": { | |
"name": "python2", | |
"display_name": "Python 2" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"[View in Colaboratory](https://colab.research.google.com/gist/kechan/38b4e6c85501246a2a49deadd26aefd7/keras-transfer-learning-with-feature-caching.ipynb)" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "eZvBWtxiCi8a", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "0f8fc981-94e9-46f9-9280-c1aaa36b7956" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import tensorflow as tf\n", | |
"device_name = tf.test.gpu_device_name()\n", | |
"if device_name != '/device:GPU:0':\n", | |
" raise SystemError('GPU device not found')\n", | |
"print('Found GPU at: {}'.format(device_name))" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Found GPU at: /device:GPU:0\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "AvJ_yMjvzf2m", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
}, | |
"outputId": "28c1ee7d-7c56-491a-bd47-acb44edae84a" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"ls -l" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"total 12\r\n", | |
"drwxr-xr-x 2 root root 4096 May 20 18:54 \u001b[0m\u001b[01;34mdata\u001b[0m/\r\n", | |
"drwxr-xr-x 1 root root 4096 May 20 18:39 \u001b[01;34mdatalab\u001b[0m/\r\n", | |
"drwxr-xr-x 6 root root 4096 May 20 18:55 \u001b[01;34mKerasVision\u001b[0m/\r\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "QQaQeFlgC_Ug", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"!git clone https://github.com/kechan/KerasVision.git" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "9plAlIdkEIjb", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"!pip install -U -q PyDrive" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "LjkHA3vWEKPN", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"def download_data_from_gdrive(ids, filenames):\n", | |
" \n", | |
" for id, filename in zip(ids, filenames):\n", | |
" uploaded = drive.CreateFile({'id': id})\n", | |
" uploaded.GetContentFile(filename)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "3r5f9icFEMXK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"from pydrive.auth import GoogleAuth\n", | |
"from pydrive.drive import GoogleDrive\n", | |
"from google.colab import auth\n", | |
"from oauth2client.client import GoogleCredentials\n", | |
"\n", | |
"auth.authenticate_user()\n", | |
"gauth = GoogleAuth()\n", | |
"gauth.credentials = GoogleCredentials.get_application_default()\n", | |
"drive = GoogleDrive(gauth)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "NTMJGv6IEVqY", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"download_data_from_gdrive(['1Zdt10Q1Jn-hrq2o1mmvQ1j4DgBTxxGIq', '1FgVh2oGqH9Pr4Ze2NETyLnBTPtC0hTui', '1X6ijkgbWCzATPCJLx0rBCy5jtUkjo2KG'], \n", | |
" ['train_224_224.hdf5.gz', 'validation_224_224.hdf5.gz', 'test_224_224.hdf5.gz'])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "3iucLd5AyfwA", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "e7fdb6c3-b01f-4aa8-dd53-a159147b7b2a" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"cd KerasVision/" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/content/KerasVision\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "jzhSsgOFzrlz", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"!pip install tqdm" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "mc6Mh6mSDWhO", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"#### Imports" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "w9oO1OQuC0y6", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
}, | |
"outputId": "28bcc323-8e57-43db-e3a2-0132e652714a" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import os\n", | |
"from tqdm import tqdm\n", | |
"import matplotlib.pyplot as plt\n", | |
"from keras.preprocessing import image\n", | |
"\n", | |
"from keras.models import Sequential, Model\n", | |
"from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten, InputLayer\n", | |
"from keras.layers import BatchNormalization, Activation\n", | |
"from keras.preprocessing.image import ImageDataGenerator\n", | |
"from keras import optimizers\n", | |
"from keras.utils import to_categorical, plot_model\n", | |
"from keras.models import load_model\n", | |
"from keras.applications import VGG16, MobileNet\n", | |
"\n", | |
"from data.data_util import *\n", | |
"from data.load_data import from_splitted_hdf5\n", | |
"from train import *\n", | |
"\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import pickle\n", | |
"\n", | |
"%load_ext autoreload\n", | |
"%autoreload 2\n", | |
"\n", | |
"%matplotlib inline\n", | |
"\n", | |
"from data.augmentation.CustomImageDataGenerator import * \n", | |
"\n", | |
"import h5py" | |
], | |
"execution_count": 27, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"The autoreload extension is already loaded. To reload it, use:\n", | |
" %reload_ext autoreload\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "m-vdSj5HBkSC", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"!pwd\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "X08LE52uC47C", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"#train_h5 = h5py.File('../data/train_224_224.hdf5', mode='r')\n", | |
"#dev_h5 = h5py.File('../data/validation_224_224.hdf5', mode='r')\n", | |
"\n", | |
"#train_set_x = train_h5['train_set_x'][:]\n", | |
"#train_set_y = train_h5['train_set_y'][:]\n", | |
"\n", | |
"#dev_set_x = dev_h5['dev_set_x'][:]\n", | |
"#dev_set_y = dev_h5['dev_set_y'][:]\n", | |
"\n", | |
"train_set_x, train_set_y, dev_set_x, dev_set_y, _, _, classes = \\\n", | |
"from_splitted_hdf5('../data')" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "m19WyqDTSkGY", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "ff9e2443-f861-4cb3-82fc-852f439ac0f7" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"train_set_x.shape" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(2978, 224, 224, 3)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 16 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "Xu_OjHyDDdW4", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"conv_base = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "Ay-uip_VFmtr", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"conv_base.summary()" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "Nq_NmXaefCjq", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"datagen = ImageDataGenerator(rescale=1./255)\n", | |
"batch_size = 32" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "yWTM1Ux3TF3a", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"def extract_features(set_x, set_y, sample_count):\n", | |
" features = np.zeros(shape=(sample_count, 7, 7, 1024))\n", | |
" labels = np.zeros(shape=(sample_count,1))\n", | |
" \n", | |
" generator = datagen.flow(set_x, set_y, batch_size=batch_size)\n", | |
" \n", | |
" i = 0\n", | |
" for inputs_batch, labels_batch in tqdm(generator):\n", | |
" \n", | |
" features_batch = conv_base.predict(inputs_batch)\n", | |
" \n", | |
" #print(features_batch.shape)\n", | |
" #print(labels_batch.shape)\n", | |
" \n", | |
" features[i * batch_size : (i + 1) * batch_size] = features_batch\n", | |
" labels[i * batch_size : (i + 1) * batch_size] = labels_batch\n", | |
" i += 1\n", | |
" \n", | |
" if i * batch_size >= sample_count:\n", | |
" # Note that since generators yield data indefinitely in a loop,\n", | |
" # we must `break` after every image has been seen once.\n", | |
" break\n", | |
" \n", | |
" return features, labels" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "dNZkGYCOTMQ0", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"feature_train_set_x, feature_train_set_y = extract_features(train_set_x, train_set_y, len(train_set_x))\n", | |
"feature_dev_set_x, feature_dev_set_y = extract_features(dev_set_x, dev_set_y, len(dev_set_x))" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "0R31FaYxTO-R", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 102 | |
}, | |
"outputId": "c006b4bf-6d3e-434e-d35e-29e874efe57b" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"feature_train_set_x.shape, train_set_x.shape, feature_dev_set_x.shape, dev_set_x.shape, feature_train_set_x.dtype" | |
], | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"((2978, 7, 7, 1024),\n", | |
" (2978, 224, 224, 3),\n", | |
" (600, 7, 7, 1024),\n", | |
" (600, 224, 224, 3),\n", | |
" dtype('float64'))" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 20 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "NHV1iRrlTYQQ", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# reshape\n", | |
"#feature_train_set_x = np.reshape(feature_train_set_x, (len(feature_train_set_x), 7 * 7 * 1024))\n", | |
"#feature_dev_set_x = np.reshape(feature_dev_set_x, (len(feature_dev_set_x), 7 * 7 * 1024))\n", | |
"\n", | |
"# one-hot \n", | |
"feature_train_set_y = to_categorical(feature_train_set_y)\n", | |
"feature_dev_set_y = to_categorical(feature_dev_set_y)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "zyfp1l2yTnSe", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "e8514306-9a1c-43d2-ed73-e36e994c767d" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"feature_train_set_x.shape, feature_train_set_y.shape, feature_dev_set_x.shape, feature_dev_set_y.shape, feature_train_set_x.dtype" | |
], | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"((2978, 7, 7, 1024), (2978, 7), (600, 7, 7, 1024), (600, 7), dtype('float64'))" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "NLDN8kzdTosT", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"model = Sequential()\n", | |
"\n", | |
"model.add(InputLayer(input_shape=(7, 7, 1024)))\n", | |
"model.add(Flatten())\n", | |
"#model.add(Dense(1024, input_dim=7*7*1024))\n", | |
"model.add(Dense(1024))\n", | |
"model.add(BatchNormalization())\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(Dropout(0.5))\n", | |
"model.add(Dense(7, activation='softmax'))" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "mSHbevzGTypd", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"model.compile(optimizer=optimizers.Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n", | |
"all_history = {}" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "RitWpPkYT0w0", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 445 | |
}, | |
"outputId": "de1a3baa-981e-499b-ce1d-30b6a9d190ef" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"batch_size = 32\n", | |
"# use data generator\n", | |
"train_datagen = ImageDataGenerator()\n", | |
"test_datagen = ImageDataGenerator()\n", | |
"\n", | |
"train_generator = train_datagen.flow(feature_train_set_x, feature_train_set_y, batch_size=batch_size)\n", | |
"validation_generator = test_datagen.flow(feature_dev_set_x, feature_dev_set_y, batch_size=batch_size)\n", | |
"\n", | |
"\n", | |
"history = model.fit_generator(train_generator, steps_per_epoch=len(feature_train_set_y)//batch_size, epochs=10, \n", | |
" validation_data=validation_generator, validation_steps=len(feature_dev_set_y)//batch_size)\n", | |
"\n", | |
"# default way\n", | |
"#history = model.fit(feature_train_set_x, feature_train_set_y, epochs=10, batch_size=batch_size,\n", | |
"# validation_data=(feature_dev_set_x, feature_dev_set_y)\n", | |
"# )" | |
], | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py:1144: UserWarning: NumpyArrayIterator is set to use the data format convention \"channels_last\" (channels on axis 3), i.e. expected either 1, 3 or 4 channels on axis 3. However, it was passed an array with shape (2978, 7, 7, 1024) (1024 channels).\n", | |
" ' (' + str(self.x.shape[channels_axis]) + ' channels).')\n", | |
"/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py:1144: UserWarning: NumpyArrayIterator is set to use the data format convention \"channels_last\" (channels on axis 3), i.e. expected either 1, 3 or 4 channels on axis 3. However, it was passed an array with shape (600, 7, 7, 1024) (1024 channels).\n", | |
" ' (' + str(self.x.shape[channels_axis]) + ' channels).')\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/10\n", | |
"93/93 [==============================] - 9s 94ms/step - loss: 1.1749 - acc: 0.6085 - val_loss: 0.8274 - val_acc: 0.7049\n", | |
"Epoch 2/10\n", | |
"93/93 [==============================] - 8s 81ms/step - loss: 0.4332 - acc: 0.8599 - val_loss: 0.7917 - val_acc: 0.7153\n", | |
"Epoch 3/10\n", | |
"93/93 [==============================] - 7s 79ms/step - loss: 0.2851 - acc: 0.9141 - val_loss: 0.7107 - val_acc: 0.7431\n", | |
"Epoch 4/10\n", | |
"80/93 [========================>.....] - ETA: 0s - loss: 0.1671 - acc: 0.9582" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"93/93 [==============================] - 7s 79ms/step - loss: 0.1675 - acc: 0.9550 - val_loss: 0.6999 - val_acc: 0.7500\n", | |
"Epoch 5/10\n", | |
"93/93 [==============================] - 7s 79ms/step - loss: 0.1277 - acc: 0.9681 - val_loss: 0.6505 - val_acc: 0.7552\n", | |
"Epoch 6/10\n", | |
"93/93 [==============================] - 7s 79ms/step - loss: 0.0933 - acc: 0.9802 - val_loss: 0.6709 - val_acc: 0.7674\n", | |
"Epoch 7/10\n", | |
"93/93 [==============================] - 7s 79ms/step - loss: 0.0730 - acc: 0.9879 - val_loss: 0.6284 - val_acc: 0.7674\n", | |
"Epoch 8/10\n", | |
" 6/93 [>.............................] - ETA: 6s - loss: 0.0995 - acc: 0.9635" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"93/93 [==============================] - 7s 79ms/step - loss: 0.0578 - acc: 0.9916 - val_loss: 0.6399 - val_acc: 0.7639\n", | |
"Epoch 9/10\n", | |
"93/93 [==============================] - 7s 79ms/step - loss: 0.0488 - acc: 0.9956 - val_loss: 0.6805 - val_acc: 0.7622\n", | |
"Epoch 10/10\n", | |
"93/93 [==============================] - 7s 80ms/step - loss: 0.0537 - acc: 0.9850 - val_loss: 0.7092 - val_acc: 0.7552\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "DcNdpL6bT2nK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment