Last active
April 13, 2021 05:52
-
-
Save Thimira/354b90d59faf8b0d758f74eae3a511e2 to your computer and use it in GitHub Desktop.
Learn how to build a multi-class image classification system using bottleneck features from a pre-trained model in Keras to achieve transfer learning. Tutorial: https://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Using Bottleneck Features for Multi-Class Classification in Keras | |
We use this technique to build powerful (high accuracy without overfitting) Image Classification systems with small | |
amount of training data. | |
The full tutorial to get this code working can be found at the "Codes of Interest" Blog at the following link, | |
https://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html | |
Please go through the tutorial before attempting to run this code, as it explains how to setup your training data. | |
The code was tested on Python 3.5, with the following library versions, | |
Keras 2.0.6 | |
TensorFlow 1.2.1 | |
OpenCV 3.2.0 | |
This should work with Theano as well, but untested. | |
''' | |
import numpy as np | |
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img | |
from keras.models import Sequential | |
from keras.layers import Dropout, Flatten, Dense | |
from keras import applications | |
from keras.utils.np_utils import to_categorical | |
import matplotlib.pyplot as plt | |
import math | |
import cv2 | |
# dimensions of our images. | |
img_width, img_height = 224, 224 | |
top_model_weights_path = 'bottleneck_fc_model.h5' | |
train_data_dir = 'data/train' | |
validation_data_dir = 'data/validation' | |
# number of epochs to train top model | |
epochs = 50 | |
# batch size used by flow_from_directory and predict_generator | |
batch_size = 16 | |
def save_bottlebeck_features(): | |
# build the VGG16 network | |
model = applications.VGG16(include_top=False, weights='imagenet') | |
datagen = ImageDataGenerator(rescale=1. / 255) | |
generator = datagen.flow_from_directory( | |
train_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
print(len(generator.filenames)) | |
print(generator.class_indices) | |
print(len(generator.class_indices)) | |
nb_train_samples = len(generator.filenames) | |
num_classes = len(generator.class_indices) | |
predict_size_train = int(math.ceil(nb_train_samples / batch_size)) | |
bottleneck_features_train = model.predict_generator( | |
generator, predict_size_train) | |
np.save('bottleneck_features_train.npy', bottleneck_features_train) | |
generator = datagen.flow_from_directory( | |
validation_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
nb_validation_samples = len(generator.filenames) | |
predict_size_validation = int( | |
math.ceil(nb_validation_samples / batch_size)) | |
bottleneck_features_validation = model.predict_generator( | |
generator, predict_size_validation) | |
np.save('bottleneck_features_validation.npy', | |
bottleneck_features_validation) | |
def train_top_model(): | |
datagen_top = ImageDataGenerator(rescale=1. / 255) | |
generator_top = datagen_top.flow_from_directory( | |
train_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode='categorical', | |
shuffle=False) | |
nb_train_samples = len(generator_top.filenames) | |
num_classes = len(generator_top.class_indices) | |
# save the class indices to use use later in predictions | |
np.save('class_indices.npy', generator_top.class_indices) | |
# load the bottleneck features saved earlier | |
train_data = np.load('bottleneck_features_train.npy') | |
# get the class lebels for the training data, in the original order | |
train_labels = generator_top.classes | |
# https://github.com/fchollet/keras/issues/3467 | |
# convert the training labels to categorical vectors | |
train_labels = to_categorical(train_labels, num_classes=num_classes) | |
generator_top = datagen_top.flow_from_directory( | |
validation_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
nb_validation_samples = len(generator_top.filenames) | |
validation_data = np.load('bottleneck_features_validation.npy') | |
validation_labels = generator_top.classes | |
validation_labels = to_categorical( | |
validation_labels, num_classes=num_classes) | |
model = Sequential() | |
model.add(Flatten(input_shape=train_data.shape[1:])) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(num_classes, activation='sigmoid')) | |
model.compile(optimizer='rmsprop', | |
loss='categorical_crossentropy', metrics=['accuracy']) | |
history = model.fit(train_data, train_labels, | |
epochs=epochs, | |
batch_size=batch_size, | |
validation_data=(validation_data, validation_labels)) | |
model.save_weights(top_model_weights_path) | |
(eval_loss, eval_accuracy) = model.evaluate( | |
validation_data, validation_labels, batch_size=batch_size, verbose=1) | |
print("[INFO] accuracy: {:.2f}%".format(eval_accuracy * 100)) | |
print("[INFO] Loss: {}".format(eval_loss)) | |
plt.figure(1) | |
# summarize history for accuracy | |
plt.subplot(211) | |
plt.plot(history.history['acc']) | |
plt.plot(history.history['val_acc']) | |
plt.title('model accuracy') | |
plt.ylabel('accuracy') | |
plt.xlabel('epoch') | |
plt.legend(['train', 'test'], loc='upper left') | |
# summarize history for loss | |
plt.subplot(212) | |
plt.plot(history.history['loss']) | |
plt.plot(history.history['val_loss']) | |
plt.title('model loss') | |
plt.ylabel('loss') | |
plt.xlabel('epoch') | |
plt.legend(['train', 'test'], loc='upper left') | |
plt.show() | |
def predict(): | |
# load the class_indices saved in the earlier step | |
class_dictionary = np.load('class_indices.npy').item() | |
num_classes = len(class_dictionary) | |
# add the path to your test image below | |
image_path = 'path/to/your/test_image' | |
orig = cv2.imread(image_path) | |
print("[INFO] loading and preprocessing image...") | |
image = load_img(image_path, target_size=(224, 224)) | |
image = img_to_array(image) | |
# important! otherwise the predictions will be '0' | |
image = image / 255 | |
image = np.expand_dims(image, axis=0) | |
# build the VGG16 network | |
model = applications.VGG16(include_top=False, weights='imagenet') | |
# get the bottleneck prediction from the pre-trained VGG16 model | |
bottleneck_prediction = model.predict(image) | |
# build top model | |
model = Sequential() | |
model.add(Flatten(input_shape=bottleneck_prediction.shape[1:])) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(num_classes, activation='sigmoid')) | |
model.load_weights(top_model_weights_path) | |
# use the bottleneck prediction on the top model to get the final | |
# classification | |
class_predicted = model.predict_classes(bottleneck_prediction) | |
probabilities = model.predict_proba(bottleneck_prediction) | |
inID = class_predicted[0] | |
inv_map = {v: k for k, v in class_dictionary.items()} | |
label = inv_map[inID] | |
# get the prediction label | |
print("Image ID: {}, Label: {}".format(inID, label)) | |
# display the predictions with the image | |
cv2.putText(orig, "Predicted: {}".format(label), (10, 30), | |
cv2.FONT_HERSHEY_PLAIN, 1.5, (43, 99, 255), 2) | |
cv2.imshow("Classification", orig) | |
cv2.waitKey(0) | |
cv2.destroyAllWindows() | |
save_bottlebeck_features() | |
train_top_model() | |
predict() | |
cv2.destroyAllWindows() |
mehki
commented
Apr 14, 2020
via email
training data 70,000
validation data 30,000
if i used your code for my data for training and testing both ,it will
provide me good results ?
i have spend 1 week on it but now i want my model to be train at once .
<http://www.avg.com/email-signature?utm_medium=email&utm_source=link&utm_campaign=sig-email&utm_content=webmail>
Virus-free.
www.avg.com
<http://www.avg.com/email-signature?utm_medium=email&utm_source=link&utm_campaign=sig-email&utm_content=webmail>
<#DAB4FAD8-2DD7-40BB-A1B8-4E2AA1F9FDF2>
…On Tue, Apr 14, 2020 at 6:04 PM Thimira Amaratunga ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
@mehki <https://github.com/mehki> 2 epochs are not normally enough for a
dataset of that size. How is the validation accuracy behaving?
This code gist is a little bit old. I have a more improved version of
multiclass classification in keras with bottleneck and fine tuning in by
Bird Watch project. You can check the project at
https://github.com/Thimira/bird_watch. See the 'bird_watch_train.py' and
'bird_watch_train_optimized.py' files.
—
You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
<https://gist.github.com/354b90d59faf8b0d758f74eae3a511e2#gistcomment-3253855>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AEG7BY6RBF4SYHNTDFJBERTRMRNOPANCNFSM4MHUPZSQ>
.
--
*Mehak Riaz*
Lab Engineer
College of Computing & Information Sciences
PAF-Karachi Institute of Economics & Technology
Email:* [email protected] <[email protected]>*
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment