Last active
May 3, 2019 12:55
-
-
Save guimeira/61a6ff6c347f8cc123e1ace3f92a8fa8 to your computer and use it in GitHub Desktop.
Caffe Python Data Augmentation Layer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import caffe | |
import cv2 | |
import numpy as np | |
import sys | |
import re | |
import random | |
from Queue import Queue | |
from threading import Thread | |
from multiprocessing import cpu_count | |
############################################################################### | |
# Selectors: # | |
# For each image, the selector's method `select` is invoked. Its `numFilters` # | |
# parameter indicates the number of filters that are currently available and # | |
# it must return a list of integers, containing the 0-based indexes of the # | |
# filters that are going to be applied to an image. # | |
############################################################################### | |
# Uniform selector: | |
# Its `probability` parameter defines the probability of filters being applied | |
# to an image. E.g.: if probability is 0.8, there is an 80% chance of applying | |
# filters to an image. The set of filters to be applied is selected at random. | |
class Uniform(): | |
def __init__(self, probability): | |
self.probability = float(probability) | |
def select(self, numFilters): | |
if random.random() < self.probability: | |
numSelected = random.randint(1,numFilters) | |
filters = random.sample(range(numFilters), numSelected) | |
return filters | |
else: | |
return [] | |
############################################################################### | |
# Filters: # | |
# The filter's `apply` method is invoked for every image that this filter is # | |
# going to process. The image received as a parameter is a numpy array shaped # | |
# like an OpenCV image (axis 0 is height, 1 is width and 2 is the channel). # | |
############################################################################### | |
# Flip: | |
# Flips an image vertically or horizontally. The constructor parameter | |
# `flipMode` is a string containing either 'h' for horizontal flip or 'v' for | |
# vertical flip. | |
class Flip(): | |
def __init__(self, flipMode): | |
if flipMode == 'v': | |
self.flipMode = 0 | |
elif flipMode == 'h': | |
self.flipMode = 1 | |
else: | |
raise Exception('Invalid flip mode') | |
def apply(self, cvImg): | |
return cv2.flip(cvImg,self.flipMode) | |
# Rotate: | |
# Rotates an image by an arbitrary number of degrees. The `degrees` parameter | |
# defines the number of degrees (counterclockwise) and the optional parameter | |
# `fillBackground`, when set to True, uses the upper-left pixel of the original | |
# image to fill the black areas created by the rotation. By default, those | |
# areas are filled with black pixels. Rotations that are multiple of 90 degrees | |
# do not generate those areas. | |
class Rotate(): | |
def __init__(self, degrees, fillBackground = False): | |
self.degrees = degrees | |
self.fillBackground = fillBackground | |
self.rotMat = None | |
def setup(self,imgSize): | |
center = (imgSize[0]/2, imgSize[1]/2) | |
self.rotMat = cv2.getRotationMatrix2D(center,self.degrees,1.0) | |
self.imgSize = (imgSize[0], imgSize[1]) | |
def apply(self,cvImg): | |
backgroundColor = (0,0,0) | |
if self.fillBackground: | |
backgroundColor = tuple(cvImg[0,0,:].astype(int)) | |
if self.rotMat is None: | |
self.imgSize = (cvImg.shape[1], cvImg.shape[0]) | |
center = (self.imgSize[0]/2, self.imgSize[1]/2) | |
self.rotMat = cv2.getRotationMatrix2D(center,self.degrees,1.0) | |
return cv2.warpAffine(cvImg, self.rotMat, self.imgSize, flags=cv2.INTER_LANCZOS4, borderMode=cv2.BORDER_CONSTANT, borderValue=backgroundColor) | |
# Scale: | |
# Scales an image by a factor. The `factor` parameter indicates how much the image | |
# must be scaled. A number larger than 1 enlarges the image (1.1 = 10% bigger). A | |
# number between 0 and 1 shrinks it (0.9 = 10% smaller). The `fillBackground` | |
# parameter works the same way as on the Rotate filter. | |
class Scale(): | |
def __init__(self, factor, fillBackground = False): | |
self.factor = factor | |
self.fillBackground = fillBackground | |
self.transfMat = None | |
def apply(self,cvImg): | |
backgroundColor = (0,0,0) | |
if self.fillBackground: | |
backgroundColor = tuple(cvImg[0,0,:].astype(int)) | |
if self.transfMat is None: | |
self.imgSize = (cvImg.shape[1], cvImg.shape[0]) | |
center = (self.imgSize[0]/2, self.imgSize[1]/2) | |
self.transfMat = cv2.getRotationMatrix2D(center,0,self.factor) | |
return cv2.warpAffine(cvImg, self.transfMat, self.imgSize, flags=cv2.INTER_LANCZOS4, borderMode=cv2.BORDER_CONSTANT, borderValue=backgroundColor) | |
# Brightness: | |
# Increases or decreases brightness of the image. The value `beta` is added to the | |
# three color channels. If `floodFill` is True, after the brightness change, | |
# we apply a floodfill using the original value of the upper-left pixel to keep the | |
# background color unchanged. | |
class Brightness(): | |
def __init__(self, beta, floodFill = False): | |
self.beta = beta | |
self.floodFill = floodFill | |
def apply(self, cvImg): | |
backgroundColor = tuple(cvImg[0,0,:].astype(int)) | |
processed = cv2.add(cvImg, (self.beta, self.beta, self.beta, 0)) | |
if self.floodFill: | |
cv2.floodFill(processed, None, (0,0), backgroundColor) | |
return processed | |
# Contrast: | |
# Increases or decreases contrast of the image. All the valus on the image are multiplied | |
# by the `alpha` parameter. If 0 < alpha < 1, the contrast is decreased, if alpha > 1, the | |
# contrast is increased. If `floodFill` is True, after the brightness change, we apply a | |
# floodfill using the original value of the upper-left pixel to keep the background color | |
# unchanged. | |
class Contrast(): | |
def __init__(self, alpha, floodFill = False): | |
self.alpha = alpha | |
self.floodFill = floodFill | |
def apply(self, cvImg): | |
backgroundColor = tuple(cvImg[0,0,:].astype(int)) | |
processed = cv2.multiply(cvImg, (self.alpha, self.alpha, self.alpha, 0)) | |
if self.floodFill: | |
cv2.floodFill(processed, None, (0,0), backgroundColor) | |
return processed | |
# Sharpen: | |
# Sharpens the image | |
class Sharpen(): | |
def apply(self, cvImg): | |
processed = cv2.GaussianBlur(cvImg, (5,5), 0) | |
return cv2.addWeighted(cvImg, 1.5, processed, -0.5, 0) | |
# This method is executed by each one of the image processing threads. | |
# It allocates the selector and filters and watches a queue for images | |
# coming from the main thread. | |
def workerThread(queue, config): | |
#Parse configuration string: | |
parsedConfig = eval('{'+config+'}') | |
#Make sure the configurations are here: | |
if 'selector' not in parsedConfig: | |
raise Exception('Selector configuration missing') | |
if 'filters' not in parsedConfig: | |
raise Exception('Filter configuration missing') | |
selector = parsedConfig['selector'] | |
filters = parsedConfig['filters'] | |
numFilters = len(filters) | |
#Infinite loop: | |
while True: | |
#Get image from queue: | |
(bottom,top,index) = queue.get() | |
#Convert to the OpenCV shape: | |
caffeIn = bottom[0].data[index,...] | |
cvIn = np.transpose(caffeIn,(1,2,0)) | |
#Select filters and invoke them: | |
for fPos in selector.select(numFilters): | |
f = filters[fPos] | |
cvIn = f.apply(cvIn) | |
#Convert back to the Caffe format: | |
caffeOut = np.transpose(cvIn, (2,0,1)) | |
top[0].data[index,:] = caffeOut | |
#Complete task: | |
queue.task_done() | |
# The Augmentation Layer applies transformations to the images | |
# on the fly. This effectively increases the size of a dataset | |
# and usually prevents overfitting. | |
class AugmentationLayer(caffe.Layer): | |
def setup(self, bottom, top): | |
config = self.param_str | |
self.batchSize = bottom[0].data.shape[0] | |
#Create worker threads: | |
self.queue = Queue(maxsize=0) | |
for i in range(cpu_count()): | |
thread = Thread(target=workerThread,args=(self.queue,config)) | |
thread.setDaemon(True) | |
thread.start() | |
def reshape(self, bottom, top): | |
#The output of this layer has the same size as the input. | |
#(Maybe this could be changed in the future?) | |
top[0].reshape(*bottom[0].data.shape) | |
def forward(self, bottom, top): | |
#Forward pass of the network. We put every image of the batch | |
#on the queue and wait for the worker threads to do their job. | |
for i in range(self.batchSize): | |
self.queue.put((bottom,top,i)) | |
self.queue.join() | |
def backward(self, bottom, top): | |
#Backward pass of the network. Nothing to do here. | |
pass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# LeNet | |
name: "LeNet" | |
layer { | |
name: "train-data" | |
type: "Data" | |
top: "data" | |
top: "label" | |
data_param { | |
batch_size: 64 | |
} | |
include { stage: "train" } | |
} | |
layer { | |
name: "val-data" | |
type: "Data" | |
top: "data" | |
top: "label" | |
data_param { | |
batch_size: 32 | |
} | |
include { stage: "val" } | |
} | |
layer { | |
name: "augmentation" | |
type: "Python" | |
bottom: "data" | |
top: "aug" | |
include { | |
phase: TRAIN | |
} | |
python_param { | |
module: "digits_python_layers" | |
layer: "AugmentationLayer" | |
param_str: "\"selector\": Uniform(0.8), \"filters\": [Scale(1.1),Scale(0.9, True),Rotate(30,True),Rotate(60,True),Rotate(90,True),Flip(\"v\"),Flip(\"h\"),Brightness(30),Brightness(-30,True),Contrast(0.9,True),Contrast(1.1),Sharpen()]" | |
} | |
} | |
layer { | |
name: "conv1" | |
type: "Convolution" | |
bottom: "aug" | |
top: "conv1" | |
param { | |
lr_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
} | |
include { | |
phase: TRAIN | |
} | |
convolution_param { | |
num_output: 32 | |
kernel_size: 11 | |
stride: 1 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
} | |
} | |
} | |
layer { | |
name: "conv1" | |
type: "Convolution" | |
bottom: "data" | |
top: "conv1" | |
param { | |
lr_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
} | |
exclude { | |
phase: TRAIN | |
} | |
convolution_param { | |
num_output: 32 | |
kernel_size: 11 | |
stride: 1 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
} | |
} | |
} | |
layer { | |
name: "relu1" | |
type: "ReLU" | |
bottom: "conv1" | |
top: "conv1" | |
} | |
layer { | |
name: "conv2" | |
type: "Convolution" | |
bottom: "conv1" | |
top: "conv2" | |
param { | |
lr_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
} | |
convolution_param { | |
num_output: 32 | |
kernel_size: 11 | |
stride: 1 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
} | |
} | |
} | |
layer { | |
name: "relu2" | |
type: "ReLU" | |
bottom: "conv2" | |
top: "conv2" | |
} | |
layer { | |
name: "pool1" | |
type: "Pooling" | |
bottom: "conv2" | |
top: "pool1" | |
pooling_param { | |
pool: MAX | |
kernel_size: 2 | |
} | |
} | |
layer { | |
name: "conv3" | |
type: "Convolution" | |
bottom: "pool1" | |
top: "conv3" | |
param { | |
lr_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
} | |
convolution_param { | |
num_output: 64 | |
kernel_size: 6 | |
stride: 1 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
} | |
} | |
} | |
layer { | |
name: "relu3" | |
type: "ReLU" | |
bottom: "conv3" | |
top: "conv3" | |
} | |
layer { | |
name: "conv4" | |
type: "Convolution" | |
bottom: "conv3" | |
top: "conv4" | |
param { | |
lr_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
} | |
convolution_param { | |
num_output: 64 | |
kernel_size: 6 | |
stride: 1 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
} | |
} | |
} | |
layer { | |
name: "relu4" | |
type: "ReLU" | |
bottom: "conv4" | |
top: "conv4" | |
} | |
layer { | |
name: "pool2" | |
type: "Pooling" | |
bottom: "conv4" | |
top: "pool2" | |
pooling_param { | |
pool: MAX | |
kernel_size: 2 | |
} | |
} | |
layer { | |
name: "ip1" | |
type: "InnerProduct" | |
bottom: "pool2" | |
top: "ip1" | |
param { | |
lr_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
} | |
inner_product_param { | |
num_output: 512 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
} | |
} | |
} | |
layer { | |
name: "relu5" | |
type: "ReLU" | |
bottom: "ip1" | |
top: "ip1" | |
} | |
layer { | |
name: "drop1" | |
type: "Dropout" | |
bottom: "ip1" | |
top: "ip1" | |
dropout_param { | |
dropout_ratio: 0.5 | |
} | |
} | |
layer { | |
name: "ip2" | |
type: "InnerProduct" | |
bottom: "ip1" | |
top: "ip2" | |
param { | |
lr_mult: 1 | |
} | |
param { | |
lr_mult: 2 | |
} | |
inner_product_param { | |
# Since num_output is unset, DIGITS will automatically set it to the | |
# number of classes in your dataset. | |
# Uncomment this line to set it explicitly: | |
#num_output: 10 | |
weight_filler { | |
type: "xavier" | |
} | |
bias_filler { | |
type: "constant" | |
} | |
} | |
} | |
layer { | |
name: "accuracy" | |
type: "Accuracy" | |
bottom: "ip2" | |
bottom: "label" | |
top: "accuracy" | |
include { stage: "val" } | |
} | |
layer { | |
name: "loss" | |
type: "SoftmaxWithLoss" | |
bottom: "ip2" | |
bottom: "label" | |
top: "loss" | |
exclude { stage: "deploy" } | |
} | |
layer { | |
name: "softmax" | |
type: "Softmax" | |
bottom: "ip2" | |
top: "softmax" | |
include { stage: "deploy" } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment