Last active
February 25, 2021 21:41
-
-
Save mmeendez8/0ef7b6778b4d11c581d1076166ef7828 to your computer and use it in GitHub Desktop.
CNN using pretrained VGG16 model with a new classification layer. This script reads a csv with file paths and labels and fine-tunes (or retrains) the whole network based on new images and labels. Batch size and epochs can be also personalized
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.applications import VGG16 | |
from keras.layers import Dropout, Flatten, Dense | |
from keras.applications.vgg16 import preprocess_input | |
from keras.preprocessing import image as kimage | |
from keras.models import Model | |
from keras.utils import to_categorical | |
import numpy as np | |
import pandas as pd | |
''' This file reads from a csv file the paths of the images and its corresponding labels (in my case | |
they are integers so the must be converted to one hot encoding). | |
input.csv: | |
path labels | |
0 path/file1.jpg 1 | |
1 path/file2.jpg 2 | |
... | |
''' | |
def read_image(path): | |
''' | |
This function opens an image from the provided path | |
''' | |
img = kimage.load_img(path, target_size=(224, 224)) | |
tmp = kimage.img_to_array(img) | |
tmp = np.expand_dims(tmp, axis=0) | |
tmp = preprocess_input(tmp) | |
return tmp | |
def data_generator(file_paths, labels,batch_size): | |
''' | |
Generator used by keras to create batches of images and labels. | |
''' | |
i=0 | |
while True: | |
n = len(data) | |
batch_holder = np.zeros((batch_size, 224, 224, 3)) | |
if i+batch_size > n: | |
batch_x = file_paths[i:] | |
batch_y = labels[i:] | |
i = 0 | |
else: | |
batch_x = file_paths[i:i+batch_size] | |
batch_y = labels[i:i+batch_size] | |
i += batch_size | |
for j, path in enumerate(batch_x): | |
batch_holder[j,:] = read_image(path) | |
yield batch_holder, batch_y | |
data = pd.read_csv('input.csv') | |
data = data.sample(frac=1).reset_index(drop=True) | |
n_labels = len(data['labels'].unique()) | |
labels = to_categorical(data['labels'], num_classes=n_labels) | |
file_paths = data['path'] | |
# Generate a model with all layers (with top) | |
vgg16 = VGG16(weights='imagenet', include_top=True) | |
# Add a layer where input is the output of the second last layer | |
x = Dense(n_labels, activation='softmax', name='predictions')(vgg16.layers[-2].output) | |
# Then create the corresponding model | |
model = Model(input=vgg16.input, output=x) | |
model.summary() | |
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) | |
batch_size = 32 | |
epochs = 2 | |
gen = data_generator(file_paths, labels, batch_size) | |
model.fit_generator(gen, steps_per_epoch=len(data)//batch_size, epochs=epochs) | |
# serialize model to JSON | |
model_json = model.to_json() | |
with open("model.json", "w") as json_file: | |
json_file.write(model_json) | |
# serialize weights to HDF5 | |
model.save_weights("model.h5") | |
print("Saved model to disk") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment