Forked from fchollet/classifier_from_little_data_script_2.py
Created
December 4, 2016 09:32
-
-
Save manboubird/d1d5154d8daca6a7e896fcff2272bbee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''This script goes along the blog post | |
"Building powerful image classification models using very little data" | |
from blog.keras.io. | |
It uses data that can be downloaded at: | |
https://www.kaggle.com/c/dogs-vs-cats/data | |
In our setup, we: | |
- created a data/ folder | |
- created train/ and validation/ subfolders inside data/ | |
- created cats/ and dogs/ subfolders inside train/ and validation/ | |
- put the cat pictures index 0-999 in data/train/cats | |
- put the cat pictures index 1000-1400 in data/validation/cats | |
- put the dogs pictures index 12500-13499 in data/train/dogs | |
- put the dog pictures index 13500-13900 in data/validation/dogs | |
So that we have 1000 training examples for each class, and 400 validation examples for each class. | |
In summary, this is our directory structure: | |
``` | |
data/ | |
train/ | |
dogs/ | |
dog001.jpg | |
dog002.jpg | |
... | |
cats/ | |
cat001.jpg | |
cat002.jpg | |
... | |
validation/ | |
dogs/ | |
dog001.jpg | |
dog002.jpg | |
... | |
cats/ | |
cat001.jpg | |
cat002.jpg | |
... | |
``` | |
''' | |
import os | |
import h5py | |
import numpy as np | |
from keras.preprocessing.image import ImageDataGenerator | |
from keras.models import Sequential | |
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D | |
from keras.layers import Activation, Dropout, Flatten, Dense | |
# path to the model weights file. | |
weights_path = '../keras/examples/vgg16_weights.h5' | |
top_model_weights_path = 'bottleneck_fc_model.h5' | |
# dimensions of our images. | |
img_width, img_height = 150, 150 | |
train_data_dir = 'data/train' | |
validation_data_dir = 'data/validation' | |
nb_train_samples = 2000 | |
nb_validation_samples = 800 | |
nb_epoch = 50 | |
def save_bottlebeck_features(): | |
datagen = ImageDataGenerator(rescale=1./255) | |
# build the VGG16 network | |
model = Sequential() | |
model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height))) | |
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) | |
model.add(MaxPooling2D((2, 2), strides=(2, 2))) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) | |
model.add(MaxPooling2D((2, 2), strides=(2, 2))) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) | |
model.add(MaxPooling2D((2, 2), strides=(2, 2))) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) | |
model.add(MaxPooling2D((2, 2), strides=(2, 2))) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) | |
model.add(ZeroPadding2D((1, 1))) | |
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) | |
model.add(MaxPooling2D((2, 2), strides=(2, 2))) | |
# load the weights of the VGG16 networks | |
# (trained on ImageNet, won the ILSVRC competition in 2014) | |
# note: when there is a complete match between your model definition | |
# and your weight savefile, you can simply call model.load_weights(filename) | |
assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).' | |
f = h5py.File(weights_path) | |
for k in range(f.attrs['nb_layers']): | |
if k >= len(model.layers): | |
# we don't look at the last (fully-connected) layers in the savefile | |
break | |
g = f['layer_{}'.format(k)] | |
weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] | |
model.layers[k].set_weights(weights) | |
f.close() | |
print('Model loaded.') | |
generator = datagen.flow_from_directory( | |
train_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=32, | |
class_mode=None, | |
shuffle=False) | |
bottleneck_features_train = model.predict_generator(generator, nb_train_samples) | |
np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train) | |
generator = datagen.flow_from_directory( | |
validation_data_dir, | |
target_size=(img_width, img_height), | |
batch_size=32, | |
class_mode=None, | |
shuffle=False) | |
bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples) | |
np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation) | |
def train_top_model(): | |
train_data = np.load(open('bottleneck_features_train.npy')) | |
train_labels = np.array([0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2)) | |
validation_data = np.load(open('bottleneck_features_validation.npy')) | |
validation_labels = np.array([0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2)) | |
model = Sequential() | |
model.add(Flatten(input_shape=train_data.shape[1:])) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(1, activation='sigmoid')) | |
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) | |
model.fit(train_data, train_labels, | |
nb_epoch=nb_epoch, batch_size=32, | |
validation_data=(validation_data, validation_labels)) | |
model.save_weights(top_model_weights_path) | |
save_bottlebeck_features() | |
train_top_model() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment