Created
January 8, 2018 15:11
-
-
Save OluwoleOyetoke/30f2cac788042c495f1ae34a6b742a1d to your computer and use it in GitHub Desktop.
AlexNet TensorFlow Declaration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#CREATE CNN STRUCTURE | |
"""----------------------------------------------------------------------------------------------------------------------------------------------------------------""" | |
def cnn_model_fn(features, labels, mode): | |
"""INPUT LAYER""" | |
input_layer = tf.reshape(features["x"], [-1, FLAGS.image_width, FLAGS.image_height, FLAGS.image_channels], name="input_layer") #Alexnet uses 227x227x3 input layer. '-1' means pick batch size randomly | |
#print(input_layer) | |
"""%FIRST CONVOLUTION BLOCK | |
The first convolutional layer filters the 227×227×3 input image with | |
96 kernels of size 11×11 with a stride of 4 pixels. Bias of 1.""" | |
conv1 = tf.layers.conv2d(inputs=input_layer, filters=96, kernel_size=[11, 11], strides=4, padding="valid", activation=tf.nn.relu) | |
lrn1 = tf.nn.lrn(input=conv1, depth_radius=5, bias=1.0, alpha=0.0001/5.0, beta=0.75); #Normalization layer | |
pool1_conv1 = tf.layers.max_pooling2d(inputs=lrn1, pool_size=[3, 3], strides=2) #Max Pool Layer | |
#print(pool1_conv1) | |
"""SECOND CONVOLUTION BLOCK | |
Divide the 96 channel blob input from block one into 48 and process independently""" | |
conv2 = tf.layers.conv2d(inputs=pool1_conv1, filters=256, kernel_size=[5, 5], strides=1, padding="same", activation=tf.nn.relu) | |
lrn2 = tf.nn.lrn(input=conv2, depth_radius=5, bias=1.0, alpha=0.0001/5.0, beta=0.75); #Normalization layer | |
pool2_conv2 = tf.layers.max_pooling2d(inputs=lrn2, pool_size=[3, 3], strides=2) #Max Pool Layer | |
#print(pool2_conv2) | |
"""THIRD CONVOLUTION BLOCK | |
Note that the third, fourth, and fifth convolution layers are connected to one | |
another without any intervening pooling or normalization layers. | |
The third convolutional layer has 384 kernels of size 3 × 3 | |
connected to the (normalized, pooled) outputs of the second convolutional layer""" | |
conv3 = tf.layers.conv2d(inputs=pool2_conv2, filters=384, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu) | |
#print(conv3) | |
#FOURTH CONVOLUTION BLOCK | |
"""%The fourth convolutional layer has 384 kernels of size 3 × 3""" | |
conv4 = tf.layers.conv2d(inputs=conv3, filters=384, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu) | |
#print(conv4) | |
#FIFTH CONVOLUTION BLOCK | |
"""%the fifth convolutional layer has 256 kernels of size 3 × 3""" | |
conv5 = tf.layers.conv2d(inputs=conv4, filters=256, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu) | |
pool3_conv5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2, padding="valid") #Max Pool Layer | |
#print(pool3_conv5) | |
#FULLY CONNECTED LAYER 1 | |
"""The fully-connected layers have 4096 neurons each""" | |
pool3_conv5_flat = tf.reshape(pool3_conv5, [-1, 6* 6 * 256]) #output of conv block is 6x6x256 therefore, to connect it to a fully connected layer, we can flaten it out | |
fc1 = tf.layers.dense(inputs=pool3_conv5_flat, units=4096, activation=tf.nn.relu) | |
#fc1 = tf.layers.conv2d(inputs=pool3_conv5, filters=4096, kernel_size=[6, 6], strides=1, padding="valid", activation=tf.nn.relu) #representing the FCL using a convolution block (no need to do 'pool3_conv5_flat' above) | |
#print(fc1) | |
#FULLY CONNECTED LAYER 2 | |
"""since the output from above is [1x1x4096]""" | |
fc2 = tf.layers.dense(inputs=fc1, units=4096, activation=tf.nn.relu) | |
#fc2 = tf.layers.conv2d(inputs=fc1, filters=4096, kernel_size=[1, 1], strides=1, padding="valid", activation=tf.nn.relu) | |
#print(fc2) | |
#FULLY CONNECTED LAYER 3 | |
"""since the output from above is [1x1x4096]""" | |
logits = tf.layers.dense(inputs=fc2, units=FLAGS.num_of_classes, name="logits_layer") | |
#fc3 = tf.layers.conv2d(inputs=fc2, filters=43, kernel_size=[1, 1], strides=1, padding="valid") | |
#logits = tf.layers.dense(inputs=fc3, units=FLAGS.num_of_classes) #converting the convolutional block (tf.layers.conv2d) to a dense layer (tf.layers.dense). Only needed if we had used tf.layers.conv2d to represent the FCLs | |
#print(logits) | |
#PASS OUTPUT OF LAST FC LAYER TO A SOFTMAX LAYER | |
"""convert these raw values into two different formats that our model function can return: | |
The predicted class for each example: a digit from 1–43. | |
The probabilities for each possible target class for each example | |
tf.argmax(input=fc3, axis=1: Generate predictions from the 43 last filters returned from the fc3. Axis 1 will apply argmax to the rows | |
tf.nn.softmax(logits, name="softmax_tensor"): Generate the probability distribution | |
""" | |
predictions = { | |
"classes": tf.argmax(input=logits, axis=1, name="classes_tensor"), | |
"probabilities": tf.nn.softmax(logits, name="softmax_tensor") | |
} | |
#Return result if we were in prediction mode and not training | |
if mode == tf.estimator.ModeKeys.PREDICT: | |
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) | |
#CALCULATE OUR LOSS | |
"""For both training and evaluation, we need to define a loss function that measures how closely the | |
model's predictions match the target classes. For multiclass classification, cross entropy is typically used as the loss metric.""" | |
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=FLAGS.num_of_classes) | |
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits) | |
tf.summary.scalar('Loss Per Stride', loss) #Just to see loss values per epoch (testing tensor board) | |
#CONFIGURE TRAINING | |
"""Since the loss of the CNN is the softmax cross-entropy of the fc3 layer | |
and our labels. Let's configure our model to optimize this loss value during | |
training. We'll use a learning rate of 0.001 and stochastic gradient descent | |
as the optimization algorithm:""" | |
if mode == tf.estimator.ModeKeys.TRAIN: | |
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001) | |
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) #global_Step needed for proper graph on tensor board | |
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.00005) #Very small learning rate used. Training will be slower at converging by better | |
#train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step()) | |
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) | |
#ADD EVALUATION METRICS | |
eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])} | |
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) | |
"""-----------------------------------------------------------------------------------------------------------------------------------------------------------------""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment