-
-
Save olaysco/c91ea3225c193eb4f234cd1a71b93327 to your computer and use it in GitHub Desktop.
Learn how to build a multi-class image classification system using bottleneck features from a pre-trained model in Keras to achieve transfer learning.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Using Bottleneck Features for Multi-Class Classification in Keras | |
We use this technique to build powerful (high accuracy without overfitting) Image Classification systems with small | |
amount of training data. | |
The full tutorial to get this code working can be found at the "Codes of Interest" Blog at the following link, | |
http://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html | |
Please go through the tutorial before attempting to run this code, as it explains how to setup your training data. | |
The code was tested on Python 3.5, with the following library versions, | |
Keras 2.0.6 | |
TensorFlow 1.2.1 | |
OpenCV 3.2.0 | |
This should work with Theano as well, but untested. | |
''' | |
import numpy as np | |
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img | |
from keras.models import Sequential | |
from keras.layers import Dropout, Flatten, Dense | |
from keras import applications | |
from keras.utils.np_utils import to_categorical | |
import matplotlib.pyplot as plt | |
import math | |
import cv2 | |
# dimensions of our images. | |
img_width, img_height = 224, 224 | |
top_model_weights_path = 'bottleneck_fc_model.h5' | |
train_data_dir = 'data/train' | |
validation_data_dir = 'data/validation' | |
# number of epochs to train top model | |
epochs = 50 | |
# batch size used by flow_from_directory and predict_generator | |
batch_size = 16 | |
def save_bottlebeck_features(): | |
# build the VGG16 network | |
model = applications.VGG16(include_top=False, weights='imagenet') | |
datagen = ImageDataGenerator(rescale=1. / 255) | |
generator = datagen.flow_from_directory( | |
train_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
print(len(generator.filenames)) | |
print(generator.class_indices) | |
print(len(generator.class_indices)) | |
nb_train_samples = len(generator.filenames) | |
num_classes = len(generator.class_indices) | |
predict_size_train = int(math.ceil(nb_train_samples / batch_size)) | |
bottleneck_features_train = model.predict_generator( | |
generator, predict_size_train) | |
np.save('bottleneck_features_train.npy', bottleneck_features_train) | |
generator = datagen.flow_from_directory( | |
validation_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
nb_validation_samples = len(generator.filenames) | |
predict_size_validation = int( | |
math.ceil(nb_validation_samples / batch_size)) | |
bottleneck_features_validation = model.predict_generator( | |
generator, predict_size_validation) | |
np.save('bottleneck_features_validation.npy', | |
bottleneck_features_validation) | |
def train_top_model(): | |
datagen_top = ImageDataGenerator(rescale=1. / 255) | |
generator_top = datagen_top.flow_from_directory( | |
train_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode='categorical', | |
shuffle=False) | |
nb_train_samples = len(generator_top.filenames) | |
num_classes = len(generator_top.class_indices) | |
# save the class indices to use use later in predictions | |
np.save('class_indices.npy', generator_top.class_indices) | |
# load the bottleneck features saved earlier | |
train_data = np.load('bottleneck_features_train.npy') | |
# get the class lebels for the training data, in the original order | |
train_labels = generator_top.classes | |
# https://github.com/fchollet/keras/issues/3467 | |
# convert the training labels to categorical vectors | |
train_labels = to_categorical(train_labels, num_classes=num_classes) | |
generator_top = datagen_top.flow_from_directory( | |
validation_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
nb_validation_samples = len(generator_top.filenames) | |
validation_data = np.load('bottleneck_features_validation.npy') | |
validation_labels = generator_top.classes | |
validation_labels = to_categorical( | |
validation_labels, num_classes=num_classes) | |
model = Sequential() | |
model.add(Flatten(input_shape=train_data.shape[1:])) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(num_classes, activation='sigmoid')) | |
model.compile(optimizer='rmsprop', | |
loss='categorical_crossentropy', metrics=['accuracy']) | |
history = model.fit(train_data, train_labels, | |
epochs=epochs, | |
batch_size=batch_size, | |
validation_data=(validation_data, validation_labels)) | |
model.save_weights(top_model_weights_path) | |
(eval_loss, eval_accuracy) = model.evaluate( | |
validation_data, validation_labels, batch_size=batch_size, verbose=1) | |
print("[INFO] accuracy: {:.2f}%".format(eval_accuracy * 100)) | |
print("[INFO] Loss: {}".format(eval_loss)) | |
plt.figure(1) | |
# summarize history for accuracy | |
plt.subplot(211) | |
plt.plot(history.history['acc']) | |
plt.plot(history.history['val_acc']) | |
plt.title('model accuracy') | |
plt.ylabel('accuracy') | |
plt.xlabel('epoch') | |
plt.legend(['train', 'test'], loc='upper left') | |
# summarize history for loss | |
plt.subplot(212) | |
plt.plot(history.history['loss']) | |
plt.plot(history.history['val_loss']) | |
plt.title('model loss') | |
plt.ylabel('loss') | |
plt.xlabel('epoch') | |
plt.legend(['train', 'test'], loc='upper left') | |
plt.show() | |
def predict(): | |
# load the class_indices saved in the earlier step | |
class_dictionary = np.load('class_indices.npy').item() | |
num_classes = len(class_dictionary) | |
# add the path to your test image below | |
image_path = 'path/to/your/test_image' | |
orig = cv2.imread(image_path) | |
print("[INFO] loading and preprocessing image...") | |
image = load_img(image_path, target_size=(224, 224)) | |
image = img_to_array(image) | |
# important! otherwise the predictions will be '0' | |
image = image / 255 | |
image = np.expand_dims(image, axis=0) | |
# build the VGG16 network | |
model = applications.VGG16(include_top=False, weights='imagenet') | |
# get the bottleneck prediction from the pre-trained VGG16 model | |
bottleneck_prediction = model.predict(image) | |
# build top model | |
model = Sequential() | |
model.add(Flatten(input_shape=bottleneck_prediction.shape[1:])) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(num_classes, activation='sigmoid')) | |
model.load_weights(top_model_weights_path) | |
# use the bottleneck prediction on the top model to get the final | |
# classification | |
class_predicted = model.predict_classes(bottleneck_prediction) | |
probabilities = model.predict_proba(bottleneck_prediction) | |
inID = class_predicted[0] | |
inv_map = {v: k for k, v in class_dictionary.items()} | |
label = inv_map[inID] | |
# get the prediction label | |
print("Image ID: {}, Label: {}".format(inID, label)) | |
# display the predictions with the image | |
cv2.putText(orig, "Predicted: {}".format(label), (10, 30), | |
cv2.FONT_HERSHEY_PLAIN, 1.5, (43, 99, 255), 2) | |
cv2.imshow("Classification", orig) | |
cv2.waitKey(0) | |
cv2.destroyAllWindows() | |
save_bottlebeck_features() | |
train_top_model() | |
predict() | |
cv2.destroyAllWindows() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment