-
-
Save Thimira/354b90d59faf8b0d758f74eae3a511e2 to your computer and use it in GitHub Desktop.
''' | |
Using Bottleneck Features for Multi-Class Classification in Keras | |
We use this technique to build powerful (high accuracy without overfitting) Image Classification systems with small | |
amount of training data. | |
The full tutorial to get this code working can be found at the "Codes of Interest" Blog at the following link, | |
https://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html | |
Please go through the tutorial before attempting to run this code, as it explains how to setup your training data. | |
The code was tested on Python 3.5, with the following library versions, | |
Keras 2.0.6 | |
TensorFlow 1.2.1 | |
OpenCV 3.2.0 | |
This should work with Theano as well, but untested. | |
''' | |
import numpy as np | |
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img | |
from keras.models import Sequential | |
from keras.layers import Dropout, Flatten, Dense | |
from keras import applications | |
from keras.utils.np_utils import to_categorical | |
import matplotlib.pyplot as plt | |
import math | |
import cv2 | |
# dimensions of our images. | |
img_width, img_height = 224, 224 | |
top_model_weights_path = 'bottleneck_fc_model.h5' | |
train_data_dir = 'data/train' | |
validation_data_dir = 'data/validation' | |
# number of epochs to train top model | |
epochs = 50 | |
# batch size used by flow_from_directory and predict_generator | |
batch_size = 16 | |
def save_bottlebeck_features(): | |
# build the VGG16 network | |
model = applications.VGG16(include_top=False, weights='imagenet') | |
datagen = ImageDataGenerator(rescale=1. / 255) | |
generator = datagen.flow_from_directory( | |
train_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
print(len(generator.filenames)) | |
print(generator.class_indices) | |
print(len(generator.class_indices)) | |
nb_train_samples = len(generator.filenames) | |
num_classes = len(generator.class_indices) | |
predict_size_train = int(math.ceil(nb_train_samples / batch_size)) | |
bottleneck_features_train = model.predict_generator( | |
generator, predict_size_train) | |
np.save('bottleneck_features_train.npy', bottleneck_features_train) | |
generator = datagen.flow_from_directory( | |
validation_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
nb_validation_samples = len(generator.filenames) | |
predict_size_validation = int( | |
math.ceil(nb_validation_samples / batch_size)) | |
bottleneck_features_validation = model.predict_generator( | |
generator, predict_size_validation) | |
np.save('bottleneck_features_validation.npy', | |
bottleneck_features_validation) | |
def train_top_model(): | |
datagen_top = ImageDataGenerator(rescale=1. / 255) | |
generator_top = datagen_top.flow_from_directory( | |
train_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode='categorical', | |
shuffle=False) | |
nb_train_samples = len(generator_top.filenames) | |
num_classes = len(generator_top.class_indices) | |
# save the class indices to use use later in predictions | |
np.save('class_indices.npy', generator_top.class_indices) | |
# load the bottleneck features saved earlier | |
train_data = np.load('bottleneck_features_train.npy') | |
# get the class lebels for the training data, in the original order | |
train_labels = generator_top.classes | |
# https://github.com/fchollet/keras/issues/3467 | |
# convert the training labels to categorical vectors | |
train_labels = to_categorical(train_labels, num_classes=num_classes) | |
generator_top = datagen_top.flow_from_directory( | |
validation_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=batch_size, | |
class_mode=None, | |
shuffle=False) | |
nb_validation_samples = len(generator_top.filenames) | |
validation_data = np.load('bottleneck_features_validation.npy') | |
validation_labels = generator_top.classes | |
validation_labels = to_categorical( | |
validation_labels, num_classes=num_classes) | |
model = Sequential() | |
model.add(Flatten(input_shape=train_data.shape[1:])) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(num_classes, activation='sigmoid')) | |
model.compile(optimizer='rmsprop', | |
loss='categorical_crossentropy', metrics=['accuracy']) | |
history = model.fit(train_data, train_labels, | |
epochs=epochs, | |
batch_size=batch_size, | |
validation_data=(validation_data, validation_labels)) | |
model.save_weights(top_model_weights_path) | |
(eval_loss, eval_accuracy) = model.evaluate( | |
validation_data, validation_labels, batch_size=batch_size, verbose=1) | |
print("[INFO] accuracy: {:.2f}%".format(eval_accuracy * 100)) | |
print("[INFO] Loss: {}".format(eval_loss)) | |
plt.figure(1) | |
# summarize history for accuracy | |
plt.subplot(211) | |
plt.plot(history.history['acc']) | |
plt.plot(history.history['val_acc']) | |
plt.title('model accuracy') | |
plt.ylabel('accuracy') | |
plt.xlabel('epoch') | |
plt.legend(['train', 'test'], loc='upper left') | |
# summarize history for loss | |
plt.subplot(212) | |
plt.plot(history.history['loss']) | |
plt.plot(history.history['val_loss']) | |
plt.title('model loss') | |
plt.ylabel('loss') | |
plt.xlabel('epoch') | |
plt.legend(['train', 'test'], loc='upper left') | |
plt.show() | |
def predict(): | |
# load the class_indices saved in the earlier step | |
class_dictionary = np.load('class_indices.npy').item() | |
num_classes = len(class_dictionary) | |
# add the path to your test image below | |
image_path = 'path/to/your/test_image' | |
orig = cv2.imread(image_path) | |
print("[INFO] loading and preprocessing image...") | |
image = load_img(image_path, target_size=(224, 224)) | |
image = img_to_array(image) | |
# important! otherwise the predictions will be '0' | |
image = image / 255 | |
image = np.expand_dims(image, axis=0) | |
# build the VGG16 network | |
model = applications.VGG16(include_top=False, weights='imagenet') | |
# get the bottleneck prediction from the pre-trained VGG16 model | |
bottleneck_prediction = model.predict(image) | |
# build top model | |
model = Sequential() | |
model.add(Flatten(input_shape=bottleneck_prediction.shape[1:])) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(num_classes, activation='sigmoid')) | |
model.load_weights(top_model_weights_path) | |
# use the bottleneck prediction on the top model to get the final | |
# classification | |
class_predicted = model.predict_classes(bottleneck_prediction) | |
probabilities = model.predict_proba(bottleneck_prediction) | |
inID = class_predicted[0] | |
inv_map = {v: k for k, v in class_dictionary.items()} | |
label = inv_map[inID] | |
# get the prediction label | |
print("Image ID: {}, Label: {}".format(inID, label)) | |
# display the predictions with the image | |
cv2.putText(orig, "Predicted: {}".format(label), (10, 30), | |
cv2.FONT_HERSHEY_PLAIN, 1.5, (43, 99, 255), 2) | |
cv2.imshow("Classification", orig) | |
cv2.waitKey(0) | |
cv2.destroyAllWindows() | |
save_bottlebeck_features() | |
train_top_model() | |
predict() | |
cv2.destroyAllWindows() |
hi,
i have used this code for training 50,000 training images and 30,000 validation. while training it train only using 2 epochs,after 2 epochs it decrease the accuracy. so kindly tell me im going in a right direction or not?
on 2 epochs a model can train perfectly?
hi,
can anybody tells , only using 2 epochs a model can be trained or not?
as im using 70,000 training images and 30,000 validation images.
it gives 94% accuracy on 2 epochs only ,after 2 it decreases the accuracy
@mehki 2 epochs are not normally enough for a dataset of that size. How is the validation accuracy behaving?
This code gist is a little bit old. I have a more improved version of multiclass classification in keras with bottleneck and fine tuning in by Bird Watch project. You can check the project at https://github.com/Thimira/bird_watch. See the 'bird_watch_train.py' and 'bird_watch_train_optimized.py' files.
@Thimira
Can anybody help me ,how we can use Live webcam for the prediction for this blog....thank you