Last active
January 27, 2020 22:49
-
-
Save anoken/7ce94342005b0ec3da87b236d591c744 to your computer and use it in GitHub Desktop.
YOLO_keras_train
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
#from __future__ import print_function | |
# | |
#https://mlblr.com/includes/mlai/index.html#yolov2 | |
#https://github.com/snakers4/yolov2-fish | |
from keras.models import Sequential, Model | |
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda | |
from keras.layers.advanced_activations import LeakyReLU | |
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard | |
from keras.optimizers import SGD, Adam, RMSprop | |
from keras.layers.merge import concatenate | |
import matplotlib.pyplot as plt | |
import keras.backend as K | |
import tensorflow as tf | |
import imgaug as ia | |
from tqdm import tqdm | |
from imgaug import augmenters as iaa | |
import numpy as np | |
import pickle | |
import os, cv2 | |
from preprocessing import parse_annotation, BatchGenerator | |
from keras.applications import MobileNet | |
LABELS = ['raccoon'] | |
IMAGE_H, IMAGE_W = 224, 224 | |
GRID_H, GRID_W = 7 , 7 | |
BOX = 5 | |
CLASS = len(LABELS) | |
CLASS_WEIGHTS = np.ones(CLASS, dtype='float32') | |
OBJ_THRESHOLD = 0.3 | |
NMS_THRESHOLD = 0.3 | |
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828] | |
NO_OBJECT_SCALE = 1.0 | |
OBJECT_SCALE = 5.0 | |
COORD_SCALE = 1.0 | |
CLASS_SCALE = 1.0 | |
BATCH_SIZE = 1 | |
WARM_UP_BATCHES = 0 | |
TRUE_BOX_BUFFER = 50 | |
wt_path = 'yolo.weights' | |
train_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/' | |
train_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/' | |
valid_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/' | |
valid_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/' | |
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3)) | |
true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4)) | |
mobilenet = MobileNet(input_shape=(IMAGE_H, IMAGE_W, 3),alpha = 0.75,depth_multiplier = 1, dropout = 0.001, | |
weights = "imagenet", classes = 1000, include_top=False, | |
#backend=keras.backend, layers=keras.layers,models=keras.models,utils=keras.utils | |
) | |
x = mobilenet(input_image) | |
# Layer 23 | |
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x) | |
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x) | |
# small hack to allow true_boxes to be registered when Keras build the model | |
# for more information: https://github.com/fchollet/keras/issues/2790 | |
output = Lambda(lambda args: args[0])([output, true_boxes]) | |
model = Model([input_image, true_boxes], output) | |
model.summary() | |
def create_cell_grid(grid_size, batch_size): | |
x_pos = tf.to_float(tf.range(grid_size)) | |
y_pos = tf.to_float(tf.range(grid_size)) | |
xx, yy = tf.meshgrid(x_pos, y_pos) | |
xx = tf.expand_dims(xx, -1) | |
yy = tf.expand_dims(yy, -1) | |
grid = tf.concat([xx, yy], axis=-1) # (7, 7, 2) | |
grid = tf.expand_dims(grid, -2) # (7, 7, 1, 2) | |
grid = tf.tile(grid, (1,1,5,1)) # (7, 7, 5, 2) | |
grid = tf.expand_dims(grid, 0) # (1, 7, 7, 1, 2) | |
grid = tf.tile(grid, (batch_size,1,1,1,1)) # (N, 7, 7, 1, 2) | |
return grid | |
def custom_loss(y_true, y_pred): | |
mask_shape = tf.shape(y_true)[:4] | |
cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1))) | |
cell_y = tf.transpose(cell_x, (0,2,1,3,4)) | |
cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1]) | |
coord_mask = tf.zeros(mask_shape) | |
conf_mask = tf.zeros(mask_shape) | |
class_mask = tf.zeros(mask_shape) | |
seen = tf.Variable(0.) | |
total_recall = tf.Variable(0.) | |
""" | |
Adjust prediction | |
""" | |
### adjust x and y | |
pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid | |
### adjust w and h | |
pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2]) | |
### adjust confidence | |
pred_box_conf = tf.sigmoid(y_pred[..., 4]) | |
### adjust class probabilities | |
pred_box_class = y_pred[..., 5:] | |
""" | |
Adjust ground truth | |
""" | |
### adjust x and y | |
true_box_xy = y_true[..., 0:2] # relative position to the containing cell | |
### adjust w and h | |
true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically | |
### adjust confidence | |
true_wh_half = true_box_wh / 2. | |
true_mins = true_box_xy - true_wh_half | |
true_maxes = true_box_xy + true_wh_half | |
pred_wh_half = pred_box_wh / 2. | |
pred_mins = pred_box_xy - pred_wh_half | |
pred_maxes = pred_box_xy + pred_wh_half | |
intersect_mins = tf.maximum(pred_mins, true_mins) | |
intersect_maxes = tf.minimum(pred_maxes, true_maxes) | |
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) | |
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] | |
true_areas = true_box_wh[..., 0] * true_box_wh[..., 1] | |
pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1] | |
union_areas = pred_areas + true_areas - intersect_areas | |
iou_scores = tf.truediv(intersect_areas, union_areas) | |
true_box_conf = iou_scores * y_true[..., 4] | |
### adjust class probabilities | |
true_box_class = tf.argmax(y_true[..., 5:], -1) | |
""" | |
Determine the masks | |
""" | |
### coordinate mask: simply the position of the ground truth boxes (the predictors) | |
coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE | |
### confidence mask: penelize predictors + penalize boxes with low IOU | |
# penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6 | |
true_xy = true_boxes[..., 0:2] | |
true_wh = true_boxes[..., 2:4] | |
true_wh_half = true_wh / 2. | |
true_mins = true_xy - true_wh_half | |
true_maxes = true_xy + true_wh_half | |
pred_xy = tf.expand_dims(pred_box_xy, 4) | |
pred_wh = tf.expand_dims(pred_box_wh, 4) | |
pred_wh_half = pred_wh / 2. | |
pred_mins = pred_xy - pred_wh_half | |
pred_maxes = pred_xy + pred_wh_half | |
intersect_mins = tf.maximum(pred_mins, true_mins) | |
intersect_maxes = tf.minimum(pred_maxes, true_maxes) | |
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) | |
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] | |
true_areas = true_wh[..., 0] * true_wh[..., 1] | |
pred_areas = pred_wh[..., 0] * pred_wh[..., 1] | |
union_areas = pred_areas + true_areas - intersect_areas | |
iou_scores = tf.truediv(intersect_areas, union_areas) | |
best_ious = tf.reduce_max(iou_scores, axis=4) | |
conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE | |
# penalize the confidence of the boxes, which are reponsible for corresponding ground truth box | |
conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE | |
### class mask: simply the position of the ground truth boxes (the predictors) | |
class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class) * CLASS_SCALE | |
""" | |
Warm-up training | |
""" | |
no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.) | |
seen = tf.assign_add(seen, 1.) | |
true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES), | |
lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, | |
true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2]) * no_boxes_mask, | |
tf.ones_like(coord_mask)], | |
lambda: [true_box_xy, | |
true_box_wh, | |
coord_mask]) | |
""" | |
Finalize the loss | |
""" | |
nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0)) | |
nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0)) | |
nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0)) | |
loss_xy = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2. | |
loss_wh = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2. | |
loss_conf = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2. | |
loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class) | |
loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6) | |
loss = loss_xy + loss_wh + loss_conf + loss_class | |
""" | |
Debugging code | |
""" | |
nb_true_box = tf.reduce_sum(y_true[..., 4]) | |
nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3)) | |
current_recall = nb_pred_box/(nb_true_box + 1e-6) | |
total_recall = tf.assign_add(total_recall, current_recall) | |
# loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000) | |
# loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000) | |
# loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000) | |
# loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000) | |
return loss | |
generator_config = { | |
'IMAGE_H' : IMAGE_H, | |
'IMAGE_W' : IMAGE_W, | |
'GRID_H' : GRID_H, | |
'GRID_W' : GRID_W, | |
'BOX' : BOX, | |
'LABELS' : LABELS, | |
'CLASS' : len(LABELS), | |
'ANCHORS' : ANCHORS, | |
'BATCH_SIZE' : BATCH_SIZE, | |
'TRUE_BOX_BUFFER' : 50, | |
} | |
train_imgs, seen_train_labels = parse_annotation(train_annot_folder, train_image_folder, labels=LABELS) | |
def normalize(image): | |
image = image / 255. | |
return image | |
### read saved pickle of parsed annotations | |
#with open ('train_imgs', 'rb') as fp: | |
# train_imgs = pickle.load(fp) | |
train_batch = BatchGenerator(train_imgs, generator_config, norm=normalize) | |
valid_imgs, seen_valid_labels = parse_annotation(valid_annot_folder, valid_image_folder, labels=LABELS) | |
### write parsed annotations to pickle for fast retrieval next time | |
#with open('valid_imgs', 'wb') as fp: | |
# pickle.dump(valid_imgs, fp) | |
### read saved pickle of parsed annotations | |
#with open ('valid_imgs', 'rb') as fp: | |
# valid_imgs = pickle.load(fp) | |
valid_batch = BatchGenerator(valid_imgs, generator_config, norm=normalize, jitter=False) | |
optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) | |
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9) | |
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0) | |
model.compile(loss=custom_loss, optimizer=optimizer) | |
history=model.fit_generator(generator = train_batch, | |
steps_per_epoch = len(train_batch), | |
epochs = 30, | |
verbose = 1, | |
# max_queue_size = 3 | |
) | |
model.save('my_yolo.h5') | |
np.savetxt("model_top_loss.csv", history.history['loss']) | |
#np.savetxt("model_top_val_loss.csv", history.history['val_loss']) | |
#np.savetxt("model_top_acc.csv", history.history['acc']) | |
#np.savetxt("model_top_val_acc.csv", history.history['val_acc']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
#from __future__ import print_function | |
from keras.models import Sequential, Model | |
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda | |
from keras.layers.advanced_activations import LeakyReLU | |
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard | |
from keras.optimizers import SGD, Adam, RMSprop | |
from keras.layers.merge import concatenate | |
import matplotlib.pyplot as plt | |
import keras.backend as K | |
import tensorflow as tf | |
import imgaug as ia | |
from tqdm import tqdm | |
from imgaug import augmenters as iaa | |
import numpy as np | |
import pickle | |
import os, cv2 | |
from preprocessing import parse_annotation, BatchGenerator | |
from keras.applications import MobileNet | |
LABELS = ['raccoon'] | |
IMAGE_H, IMAGE_W = 224, 224 | |
GRID_H, GRID_W = 7 , 7 | |
BOX = 5 | |
CLASS = len(LABELS) | |
CLASS_WEIGHTS = np.ones(CLASS, dtype='float32') | |
OBJ_THRESHOLD = 0.3 | |
NMS_THRESHOLD = 0.3 | |
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828] | |
NO_OBJECT_SCALE = 1.0 | |
OBJECT_SCALE = 5.0 | |
COORD_SCALE = 1.0 | |
CLASS_SCALE = 1.0 | |
BATCH_SIZE = 1 | |
WARM_UP_BATCHES = 0 | |
TRUE_BOX_BUFFER = 50 | |
wt_path = 'yolo.weights' | |
train_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/' | |
train_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/' | |
valid_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/' | |
valid_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/' | |
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3)) | |
true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4)) | |
mobilenet = MobileNet(input_shape=(IMAGE_H, IMAGE_W, 3),alpha = 0.75,depth_multiplier = 1, dropout = 0.001, | |
weights = "imagenet", classes = 1000, include_top=False, | |
#backend=keras.backend, layers=keras.layers,models=keras.models,utils=keras.utils | |
) | |
x = mobilenet(input_image) | |
# Layer 23 | |
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x) | |
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x) | |
# small hack to allow true_boxes to be registered when Keras build the model | |
# for more information: https://github.com/fchollet/keras/issues/2790 | |
#output = Lambda(lambda args: args[0])([output, true_boxes]) | |
model = Model([input_image, true_boxes], output) | |
model.summary() | |
def create_cell_grid(grid_size, batch_size): | |
x_pos = tf.to_float(tf.range(grid_size)) | |
y_pos = tf.to_float(tf.range(grid_size)) | |
xx, yy = tf.meshgrid(x_pos, y_pos) | |
xx = tf.expand_dims(xx, -1) | |
yy = tf.expand_dims(yy, -1) | |
grid = tf.concat([xx, yy], axis=-1) # (7, 7, 2) | |
grid = tf.expand_dims(grid, -2) # (7, 7, 1, 2) | |
grid = tf.tile(grid, (1,1,5,1)) # (7, 7, 5, 2) | |
grid = tf.expand_dims(grid, 0) # (1, 7, 7, 1, 2) | |
grid = tf.tile(grid, (batch_size,1,1,1,1)) # (N, 7, 7, 1, 2) | |
return grid | |
def custom_loss(y_true, y_pred): | |
""" | |
# Args | |
y_pred : (N, 13, 13, 5, 6) | |
cell_grid : (N, 13, 13, 5, 2) | |
# Returns | |
box_xy : (N, 13, 13, 5, 2) | |
1) sigmoid activation | |
2) grid offset added | |
box_wh : (N, 13, 13, 5, 2) | |
1) exponential activation | |
2) anchor box multiplied | |
box_conf : (N, 13, 13, 5, 1) | |
1) sigmoid activation | |
box_classes : (N, 13, 13, 5, nb_class) | |
""" | |
# bx = sigmoid(tx) + cx, by = sigmoid(ty) + cy | |
batch_size = tf.shape(y_pred)[0] | |
grid_size = tf.shape(y_pred)[1] | |
cell_grid = create_cell_grid(grid_size, batch_size) | |
pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid | |
pred_box_wh = tf.exp(y_pred[..., 2:4]) * BOX | |
pred_box_conf = tf.sigmoid(y_pred[..., 4]) | |
pred_box_class = y_pred[..., 5:] | |
### adjust x and y | |
true_box_xy = y_true[..., 0:2] # relative position to the containing cell | |
### adjust w and h | |
true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically | |
### adjust confidence | |
true_wh_half = true_box_wh / 2. | |
true_mins = true_box_xy - true_wh_half | |
true_maxes = true_box_xy + true_wh_half | |
pred_wh_half = pred_box_wh / 2. | |
pred_mins = pred_box_xy - pred_wh_half | |
pred_maxes = pred_box_xy + pred_wh_half | |
intersect_mins = tf.maximum(pred_mins, true_mins) | |
intersect_maxes = tf.minimum(pred_maxes, true_maxes) | |
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) | |
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] | |
true_areas = true_box_wh[..., 0] * true_box_wh[..., 1] | |
pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1] | |
union_areas = pred_areas + true_areas - intersect_areas | |
iou_scores = tf.truediv(intersect_areas, union_areas) | |
true_box_conf = iou_scores * y_true[..., 4] | |
### adjust class probabilities | |
true_box_class = tf.argmax(y_true[..., 5:], -1) | |
# concatenate pred tensor | |
pred_box_conf = tf.expand_dims(pred_box_conf, -1) | |
y_pred_activated = tf.concat([pred_box_xy, pred_box_wh, pred_box_conf, pred_box_class], axis=-1) | |
# concatenate true tensor | |
true_box_conf = tf.expand_dims(true_box_conf, -1) | |
true_box_class = tf.expand_dims(true_box_class, -1) | |
true_box_class = tf.cast(true_box_class, true_box_xy.dtype) | |
y_true_activated = tf.concat([true_box_xy, true_box_wh, true_box_conf, true_box_class], axis=-1) | |
pred_box_xy, pred_box_wh = pred_tensor[..., :2], pred_tensor[..., 2:4] | |
true_boxes = y_true[..., :4] | |
true_boxes = tf.reshape(true_boxes, [batch_size, -1, 4]) | |
true_boxes = tf.expand_dims(true_boxes, 1) | |
true_boxes = tf.expand_dims(true_boxes, 1) | |
true_boxes = tf.expand_dims(true_boxes, 1) | |
true_xy = true_boxes[..., 0:2] | |
true_wh = true_boxes[..., 2:4] | |
true_wh_half = true_wh / 2. | |
true_mins = true_xy - true_wh_half | |
true_maxes = true_xy + true_wh_half | |
pred_xy = tf.expand_dims(pred_box_xy, 4) | |
pred_wh = tf.expand_dims(pred_box_wh, 4) | |
pred_wh_half = pred_wh / 2. | |
pred_mins = pred_xy - pred_wh_half | |
pred_maxes = pred_xy + pred_wh_half | |
intersect_mins = tf.maximum(pred_mins, true_mins) | |
intersect_maxes = tf.minimum(pred_maxes, true_maxes) | |
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) | |
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] | |
true_areas = true_wh[..., 0] * true_wh[..., 1] | |
pred_areas = pred_wh[..., 0] * pred_wh[..., 1] | |
union_areas = pred_areas + true_areas - intersect_areas | |
iou_scores = tf.truediv(intersect_areas, union_areas) | |
best_ious = tf.reduce_max(iou_scores, axis=4) | |
# 1) confidence mask (N, 13, 13, 5) | |
conf_mask = tf.zeros(tf.shape(y_true)[:4]) | |
conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE | |
# penalize the confidence of the boxes, which are reponsible for corresponding ground truth box | |
conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE | |
class_wt = np.ones(CLASS, dtype='float32') | |
class_mask = y_true[..., 4] * tf.gather(class_wt, true_box_class) *CLASS_SCALE | |
coord_mask = tf.expand_dims(y_true[..., BOX_IDX_CONFIDENCE], axis=-1) *COORD_SCALE | |
nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0)) | |
nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0)) | |
nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0)) | |
pred_box_xy, pred_box_wh, pred_box_conf, pred_box_class = pred_tensor[..., :2], pred_tensor[..., 2:4], pred_tensor[..., 4], pred_tensor[..., 5:] | |
# true_box_xy, true_box_wh, true_box_conf, true_box_class = true_tensor[..., :2], true_tensor[..., 2:4], true_tensor[..., 4], true_tensor[..., 5] | |
true_box_class = tf.cast(true_box_class, tf.int64) | |
loss_xy = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2. | |
loss_wh = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2. | |
loss_conf = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2. | |
loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class) | |
loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6) | |
loss = loss_xy + loss_wh + loss_conf + loss_class | |
return loss | |
generator_config = { | |
'IMAGE_H' : IMAGE_H, | |
'IMAGE_W' : IMAGE_W, | |
'GRID_H' : GRID_H, | |
'GRID_W' : GRID_W, | |
'BOX' : BOX, | |
'LABELS' : LABELS, | |
'CLASS' : len(LABELS), | |
'ANCHORS' : ANCHORS, | |
'BATCH_SIZE' : BATCH_SIZE, | |
'TRUE_BOX_BUFFER' : 50, | |
} | |
train_imgs, seen_train_labels = parse_annotation(train_annot_folder, train_image_folder, labels=LABELS) | |
def normalize(image): | |
image = image / 255. | |
return image | |
### read saved pickle of parsed annotations | |
#with open ('train_imgs', 'rb') as fp: | |
# train_imgs = pickle.load(fp) | |
train_batch = BatchGenerator(train_imgs, generator_config, norm=normalize) | |
valid_imgs, seen_valid_labels = parse_annotation(valid_annot_folder, valid_image_folder, labels=LABELS) | |
### write parsed annotations to pickle for fast retrieval next time | |
#with open('valid_imgs', 'wb') as fp: | |
# pickle.dump(valid_imgs, fp) | |
### read saved pickle of parsed annotations | |
#with open ('valid_imgs', 'rb') as fp: | |
# valid_imgs = pickle.load(fp) | |
valid_batch = BatchGenerator(valid_imgs, generator_config, norm=normalize, jitter=False) | |
optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) | |
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9) | |
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0) | |
model.compile(loss=custom_loss, optimizer=optimizer) | |
history=model.fit_generator(generator = train_batch, | |
steps_per_epoch = len(train_batch), | |
epochs = 30, | |
verbose = 1, | |
max_queue_size = 3) | |
model.save('my_yolo.h5') | |
np.savetxt("model_top_loss.csv", history.history['loss']) | |
#np.savetxt("model_top_val_loss.csv", history.history['val_loss']) | |
#np.savetxt("model_top_acc.csv", history.history['acc']) | |
#np.savetxt("model_top_val_acc.csv", history.history['val_acc']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
#from __future__ import print_function | |
from keras.models import Sequential, Model | |
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda | |
from keras.layers.advanced_activations import LeakyReLU | |
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard | |
from keras.optimizers import SGD, Adam, RMSprop | |
from keras.layers.merge import concatenate | |
import matplotlib.pyplot as plt | |
import keras.backend as K | |
import tensorflow as tf | |
import imgaug as ia | |
from tqdm import tqdm | |
from imgaug import augmenters as iaa | |
import numpy as np | |
import pickle | |
import os, cv2 | |
from preprocessing import parse_annotation, BatchGenerator | |
from keras.applications import MobileNet | |
LABELS = ['raccoon'] | |
IMAGE_H, IMAGE_W = 224, 224 | |
GRID_H, GRID_W = 7 , 7 | |
#IMAGE_H, IMAGE_W = 416, 416 | |
#GRID_H, GRID_W = 13 , 13 | |
BOX = 5 | |
CLASS = len(LABELS) | |
CLASS_WEIGHTS = np.ones(CLASS, dtype='float32') | |
OBJ_THRESHOLD = 0.3#0.5 | |
NMS_THRESHOLD = 0.3#0.45 | |
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828] | |
NO_OBJECT_SCALE = 1.0 | |
OBJECT_SCALE = 5.0 | |
COORD_SCALE = 1.0 | |
CLASS_SCALE = 1.0 | |
BATCH_SIZE = 16 | |
WARM_UP_BATCHES = 0 | |
TRUE_BOX_BUFFER = 50 | |
wt_path = 'yolo.weights' | |
train_image_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/images/' | |
train_annot_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/annotations/' | |
valid_image_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/images/' | |
valid_annot_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/annotations/' | |
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3)) | |
true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4)) | |
mobilenet = MobileNet(input_shape=(IMAGE_H, IMAGE_W, 3),alpha = 0.75,depth_multiplier = 1, dropout = 0.001, | |
weights = "imagenet", classes = 1000, include_top=False, | |
#backend=keras.backend, layers=keras.layers,models=keras.models,utils=keras.utils | |
) | |
x = mobilenet(input_image) | |
# Layer 23 | |
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x) | |
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x) | |
# small hack to allow true_boxes to be registered when Keras build the model | |
# for more information: https://github.com/fchollet/keras/issues/2790 | |
#output = Lambda(lambda args: args[0])([output, true_boxes]) | |
model = Model([input_image, true_boxes], output) | |
model.summary() | |
def custom_loss(y_true, y_pred): | |
mask_shape = tf.shape(y_true)[:4] | |
cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1))) | |
cell_y = tf.transpose(cell_x, (0,2,1,3,4)) | |
cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1]) | |
coord_mask = tf.zeros(mask_shape) | |
conf_mask = tf.zeros(mask_shape) | |
class_mask = tf.zeros(mask_shape) | |
seen = tf.Variable(0.) | |
total_recall = tf.Variable(0.) | |
""" | |
Adjust prediction | |
""" | |
### adjust x and y | |
pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid | |
### adjust w and h | |
pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2]) | |
### adjust confidence | |
pred_box_conf = tf.sigmoid(y_pred[..., 4]) | |
### adjust class probabilities | |
pred_box_class = y_pred[..., 5:] | |
""" | |
Adjust ground truth | |
""" | |
### adjust x and y | |
true_box_xy = y_true[..., 0:2] # relative position to the containing cell | |
### adjust w and h | |
true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically | |
### adjust confidence | |
true_wh_half = true_box_wh / 2. | |
true_mins = true_box_xy - true_wh_half | |
true_maxes = true_box_xy + true_wh_half | |
pred_wh_half = pred_box_wh / 2. | |
pred_mins = pred_box_xy - pred_wh_half | |
pred_maxes = pred_box_xy + pred_wh_half | |
intersect_mins = tf.maximum(pred_mins, true_mins) | |
intersect_maxes = tf.minimum(pred_maxes, true_maxes) | |
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) | |
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] | |
true_areas = true_box_wh[..., 0] * true_box_wh[..., 1] | |
pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1] | |
union_areas = pred_areas + true_areas - intersect_areas | |
iou_scores = tf.truediv(intersect_areas, union_areas) | |
true_box_conf = iou_scores * y_true[..., 4] | |
### adjust class probabilities | |
true_box_class = tf.argmax(y_true[..., 5:], -1) | |
""" | |
Determine the masks | |
""" | |
### coordinate mask: simply the position of the ground truth boxes (the predictors) | |
coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE | |
### confidence mask: penelize predictors + penalize boxes with low IOU | |
# penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6 | |
true_xy = true_boxes[..., 0:2] | |
true_wh = true_boxes[..., 2:4] | |
true_wh_half = true_wh / 2. | |
true_mins = true_xy - true_wh_half | |
true_maxes = true_xy + true_wh_half | |
pred_xy = tf.expand_dims(pred_box_xy, 4) | |
pred_wh = tf.expand_dims(pred_box_wh, 4) | |
pred_wh_half = pred_wh / 2. | |
pred_mins = pred_xy - pred_wh_half | |
pred_maxes = pred_xy + pred_wh_half | |
intersect_mins = tf.maximum(pred_mins, true_mins) | |
intersect_maxes = tf.minimum(pred_maxes, true_maxes) | |
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) | |
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] | |
true_areas = true_wh[..., 0] * true_wh[..., 1] | |
pred_areas = pred_wh[..., 0] * pred_wh[..., 1] | |
union_areas = pred_areas + true_areas - intersect_areas | |
iou_scores = tf.truediv(intersect_areas, union_areas) | |
best_ious = tf.reduce_max(iou_scores, axis=4) | |
conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE | |
# penalize the confidence of the boxes, which are reponsible for corresponding ground truth box | |
conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE | |
### class mask: simply the position of the ground truth boxes (the predictors) | |
class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class) * CLASS_SCALE | |
""" | |
Warm-up training | |
""" | |
no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.) | |
seen = tf.assign_add(seen, 1.) | |
true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES), | |
lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, | |
true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2]) * no_boxes_mask, | |
tf.ones_like(coord_mask)], | |
lambda: [true_box_xy, | |
true_box_wh, | |
coord_mask]) | |
""" | |
Finalize the loss | |
""" | |
nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0)) | |
nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0)) | |
nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0)) | |
loss_xy = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2. | |
loss_wh = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2. | |
loss_conf = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2. | |
loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class) | |
loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6) | |
loss = loss_xy + loss_wh + loss_conf + loss_class | |
""" | |
Debugging code | |
""" | |
nb_true_box = tf.reduce_sum(y_true[..., 4]) | |
nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3)) | |
current_recall = nb_pred_box/(nb_true_box + 1e-6) | |
total_recall = tf.assign_add(total_recall, current_recall) | |
# loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000) | |
# loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000) | |
# loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000) | |
# loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000) | |
# loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000) | |
return loss | |
generator_config = { | |
'IMAGE_H' : IMAGE_H, | |
'IMAGE_W' : IMAGE_W, | |
'GRID_H' : GRID_H, | |
'GRID_W' : GRID_W, | |
'BOX' : BOX, | |
'LABELS' : LABELS, | |
'CLASS' : len(LABELS), | |
'ANCHORS' : ANCHORS, | |
'BATCH_SIZE' : BATCH_SIZE, | |
'TRUE_BOX_BUFFER' : 50, | |
} | |
train_imgs, seen_train_labels = parse_annotation(train_annot_folder, train_image_folder, labels=LABELS) | |
def normalize(image): | |
image = image / 255. | |
return image | |
### read saved pickle of parsed annotations | |
#with open ('train_imgs', 'rb') as fp: | |
# train_imgs = pickle.load(fp) | |
train_batch = BatchGenerator(train_imgs, generator_config, norm=normalize) | |
valid_imgs, seen_valid_labels = parse_annotation(valid_annot_folder, valid_image_folder, labels=LABELS) | |
### write parsed annotations to pickle for fast retrieval next time | |
#with open('valid_imgs', 'wb') as fp: | |
# pickle.dump(valid_imgs, fp) | |
### read saved pickle of parsed annotations | |
#with open ('valid_imgs', 'rb') as fp: | |
# valid_imgs = pickle.load(fp) | |
valid_batch = BatchGenerator(valid_imgs, generator_config, norm=normalize, jitter=False) | |
optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) | |
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9) | |
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0) | |
model.compile(loss=custom_loss, optimizer=optimizer) | |
history=model.fit_generator(generator = train_batch, | |
steps_per_epoch = len(train_batch), | |
epochs = 3, | |
verbose = 0, | |
max_queue_size = 3) | |
history.save('my_yolo.h5') | |
np.savetxt("model_top_loss.csv", history.history['loss']) | |
#np.savetxt("model_top_val_loss.csv", history.history['val_loss']) | |
#np.savetxt("model_top_acc.csv", history.history['acc']) | |
#np.savetxt("model_top_val_acc.csv", history.history['val_acc']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment