Skip to content

Instantly share code, notes, and snippets.

@sidgan
Created April 17, 2017 19:20
Show Gist options
  • Save sidgan/292edde55d6e9edd6a4b8520453017cb to your computer and use it in GitHub Desktop.
Save sidgan/292edde55d6e9edd6a4b8520453017cb to your computer and use it in GitHub Desktop.
#####
# modifiled from https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/06_CIFAR-10.ipynb
#####
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import random
#from itertools import *
import itertools
# Use PrettyTensor to simplify Neural Network construction.
import prettytensor as pt
seed = 7
np.random.seed(seed)
import _pickle as cPickle
epochs = 1000
import cifar10
cifar10.maybe_download_and_extract()
class_names = cifar10.load_class_names()
print(class_names)
images_train, cls_train, labels_train = cifar10.load_training_data()
number_train = random.randrange(0,len(images_train)-9)
size_training_data = len(images_train)
images_test, cls_test, labels_test = cifar10.load_test_data()
number_test = random.randrange(0,len(images_test)-9)
print("Size of:")
print("- Training-set:\t\t{}".format(len(images_train)))
print("- Test-set:\t\t{}".format(len(images_test)))
print(images_test.shape)
print(images_train.shape)
from cifar10 import img_size, num_channels, num_classes
img_size_cropped = 24
import psutil
process = psutil.Process(os.getpid())
print(process.memory_info().rss)
######################
#placeholders for image and labels
######################
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size, num_channels], name='x')
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)
######################
#preprocessing actual function
######################
def pre_process_image(image, training):
# This function takes a single image as input,
# and a boolean whether to build the training or testing graph.
if training:
# For training, add the following to the TensorFlow graph.
# Randomly crop the input image.
image = tf.random_crop(image, size=[img_size_cropped, img_size_cropped, num_channels])
# Randomly flip the image horizontally.
image = tf.image.random_flip_left_right(image)
# Randomly adjust hue, contrast and saturation.
image = tf.image.random_hue(image, max_delta=0.05)
image = tf.image.random_contrast(image, lower=0.3, upper=1.0)
image = tf.image.random_brightness(image, max_delta=0.2)
image = tf.image.random_saturation(image, lower=0.0, upper=2.0)
# Some of these functions may overflow and result in pixel
# values beyond the [0, 1] range. It is unclear from the
# documentation of TensorFlow 0.10.0rc0 whether this is
# intended. A simple solution is to limit the range.
# Limit the image pixels between [0, 1] in case of overflow.
image = tf.minimum(image, 1.0)
image = tf.maximum(image, 0.0)
else:
# For training, add the following to the TensorFlow graph.
# Crop the input image around the centre so it is the same
# size as images that are randomly cropped during training.
image = tf.image.resize_image_with_crop_or_pad(image,
target_height=img_size_cropped,
target_width=img_size_cropped)
return image
######################
#preprocessing call to function
######################
def pre_process(images, training):
# Use TensorFlow to loop over all the input images and call
# the function above which takes a single image as input.
images = tf.map_fn(lambda image: pre_process_image(image, training), images)
return images
######################
#preprocessing all images using placeholders
######################
distorted_images = pre_process(images=x, training=True)
######################
#network define
######################
def main_network(images, training):
# Wrap the input images as a Pretty Tensor object.
x_pretty = pt.wrap(images)
# Pretty Tensor uses special numbers to distinguish between
# the training and testing phases.
if training:
phase = pt.Phase.train
else:
phase = pt.Phase.infer
# Create the convolutional neural network using Pretty Tensor.
# It is very similar to the previous tutorials, except
# the use of so-called batch-normalization in the first layer.
with pt.defaults_scope(activation_fn=tf.nn.relu, phase=phase):
y_pred, loss = x_pretty.\
conv2d(kernel=5, depth=64, name='layer_conv1', batch_normalize=True).\
max_pool(kernel=2, stride=2).\
conv2d(kernel=5, depth=64, name='layer_conv2').\
max_pool(kernel=2, stride=2).\
flatten().\
fully_connected(size=256, name='layer_fc1').\
fully_connected(size=128, name='layer_fc2').\
softmax_classifier(num_classes=num_classes, labels=y_true)
#add dropout to this network?
return y_pred, loss
######################
#give the network placeholders
######################
def create_network(training):
# Wrap the neural network in the scope named 'network'.
# Create new variables during training, and re-use during testing.
with tf.variable_scope('network', reuse=not training):
# Just rename the input placeholder variable for convenience.
images = x
# Create TensorFlow graph for pre-processing.
images = pre_process(images=images, training=training)
# Create TensorFlow graph for the main processing.
y_pred, loss = main_network(images=images, training=training)
return y_pred, loss
######################
#save variables and optimize
######################
global_step = tf.Variable(initial_value=0,name='global_step', trainable=False)
_, loss = create_network(training=True)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, global_step=global_step)
y_pred, _ = create_network(training=False)
y_pred_cls = tf.argmax(y_pred, dimension=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
######################
#function for saving weights of the model
######################
def get_weights_variable(layer_name):
# Retrieve an existing variable named 'weights' in the scope
# with the given layer_name.
# This is awkward because the TensorFlow function was
# really intended for another purpose.
with tf.variable_scope("network/" + layer_name, reuse=True):
variable = tf.get_variable('weights')
return variable
######################
#get weights of the layers
######################
weights_conv1 = get_weights_variable(layer_name='layer_conv1')
weights_conv2 = get_weights_variable(layer_name='layer_conv2')
######################
#function to get output of the layer
######################
def get_layer_output(layer_name):
# The name of the last operation of the convolutional layer.
# This assumes you are using Relu as the activation-function.
tensor_name = "network/" + layer_name + "/Relu:0"
# Get the tensor with this name.
tensor = tf.get_default_graph().get_tensor_by_name(tensor_name)
return tensor
######################
#get output of the layers
######################
output_conv1 = get_layer_output(layer_name='layer_conv1')
output_conv2 = get_layer_output(layer_name='layer_conv2')
######################
#session variables and try to save
######################
session = tf.Session()
save_dir = 'checkpoints/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
save_path = os.path.join(save_dir, 'cifar10_cnn')
try:
print("Trying to restore last checkpoint ...")
# Use TensorFlow to find the latest checkpoint - if any.
last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir)
# Try and load the data in the checkpoint.
saver.restore(session, save_path=last_chk_path)
# If we get to this point, the checkpoint was successfully loaded.
print("Restored checkpoint from:", last_chk_path)
except:
# If the above failed for some reason, simply
# initialize all the variables for the TensorFlow graph.
print("Failed to restore checkpoint. Initializing variables instead.")
session.run(tf.global_variables_initializer())
######################
#variables for training
######################
train_batch_size = 128
batch_size = train_batch_size
######################
#overtraining and generalization functions
######################
def random_batch():
# Number of images in the training-set.
num_images = len(images_train)
# Create a random index.
idx = np.random.choice(num_images,size=train_batch_size,replace=False)
# Use the random index to select random images and labels.
x_batch = images_train[idx, :, :, :]
y_batch = labels_train[idx, :]
return x_batch, y_batch
def convert_to_single_from_one_hot(l):
return list(l).index(1)
######################
# next batch
######################
def next_batch(batch_number, batch_size, data, labels):
"""
Returns a total of `num` samples from the image array `data` and label array `labels`
[(minibatchNumber -1)*sizeOfMiniBatch , minibatchNumber *sizeOfMiniBatch -1]
"""
decrease = 0
idx = np.arange(batch_number, len(data)) # get all possible indexes
original_length = len(idx)
if decrease:
idx = idx[decrease:]
new_length = len(idx)
assert new_length + decrease == original_length
idx = idx[(batch_number-1) * batch_size:batch_number*batch_size-1] # use only `num` random indexes
data_shuffle = [data[i] for i in idx] # get list of `num` random samples
label_shuffle = [labels[i] for i in idx] # get list of `num` random samples
for index in range(len(label_shuffle)):
print("----Index number: ", index, "\t----Label: ", \
convert_to_single_from_one_hot(label_shuffle[index]),\
"\t----Image Shape", data_shuffle[index].shape)
#print(label_shuffle)
return data_shuffle, label_shuffle
######################
#
######################
def is_overtrain(training_accuracy, test_accuracy):
if distance_point_line(training_accuracy, test_accuracy, "normal") < \
distance_point_line(training_accuracy, test_accuracy, "overtrain"):
return False
else:
print(" \n******** overtrain ********\n ")
return True
######################
#
######################
def batch_navigator(training_accuracy, test_accuracy, epoch, current_batch_sequence, constant_training_accuracy, increase):
if is_overtrain(training_accuracy, test_accuracy) or constant_training_accuracy:
#IF TRAINING ACCURACY IS NOT IMPROVING IN MORE THAN X EPOCHS THEN INCREASE THE NUMBER OF BATCHES
print("******** Increasing number of batches because training accuracy is within the same window ********")
return range(len(current_batch_sequence) + increase)
else:
return current_batch_sequence
######################
#
######################
def distance_point_line(point_x,point_y,source):
if source == "normal":
return float( abs( -1*point_x + point_y ) / math.sqrt(2))
elif source == "overtrain":
return abs( point_x -1 )
else:
return "ERROR from distance_point_line"
def save_everything(dir_name, all_testing_accuracy, all_training_accuracy, all_loss, all_minibatches_seen, all_num_minibatches):
assert len(all_loss) == len(all_testing_accuracy) == len(all_training_accuracy)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
cPickle.dump(all_testing_accuracy, open(dir_name+'/test_accuracy.p', 'wb'))
cPickle.dump(all_training_accuracy, open(dir_name+'/train_accuracy.p', 'wb'))
cPickle.dump(all_loss, open(dir_name+'/loss.p', 'wb'))
cPickle.dump(all_minibatches_seen, open(dir_name+'/minibatches_seen.p', 'wb'))
cPickle.dump(all_num_minibatches, open(dir_name+'/num_minibatches.p', 'wb'))
######################
# function for overtraining and gen using range
######################
def optimize_using_overtraining_and_undertraining(epochs, accuracy_range, name, increase):
# Start-time used for printing time-usage below.
start_time = time.time()
#USE OVERTRAINING AND GENERALIZATION HERE
number_of_minibatches = int(size_training_data / (batch_size * 1.0))
images_train, labels_train = random_batch()
training_accuracy_per_epoch = 0
loss_per_epoch = 0
minibatches_per_epoch = 0
test_accuracy = 0
all_training_accuracy = []
all_testing_accuracy = []
all_loss = []
all_minibatches_seen = []
all_num_minibatches = []
num_minibatches_trained = 0
previous_training_accuracy = 0
constant_training_accuracy = False
#what was the batch size in last epoch
for epoch in range(epochs):
if epoch == 0:
current_batch_sequence = [1]
batch_sequence = current_batch_sequence
elif epoch > 5:
if all(previous_training_accuracy <= (accuracy_range*i)+i for i in all_training_accuracy[-6:len(all_training_accuracy)-1]) \
and all(previous_training_accuracy >= abs(i-(accuracy_range*i)) for i in all_training_accuracy[-6:len(all_training_accuracy)-1]):
constant_training_accuracy = True
else:
constant_training_accuracy = False
current_batch_sequence = batch_navigator(training_accuracy_per_epoch, test_accuracy, epoch, current_batch_sequence, constant_training_accuracy, increase)
batch_sequence = current_batch_sequence
else:
current_batch_sequence = batch_navigator(training_accuracy_per_epoch, test_accuracy, epoch, current_batch_sequence, False, increase)
batch_sequence = current_batch_sequence
training_accuracy_per_epoch = 0
loss_per_epoch = 0
minibatches_per_epoch = 0
test_accuracy = 0
for minibatch_number in batch_sequence:
batch_x, batch_y = next_batch(minibatch_number, batch_size, images_train, labels_train)
feed_dict_train = {x: batch_x,y_true: batch_y}
i_global, _ = session.run([global_step, optimizer], feed_dict=feed_dict_train)
num_minibatches_trained +=1
minibatches_per_epoch +=1
for minibatch_number in batch_sequence:
batch_x, batch_y = next_batch(minibatch_number, batch_size, images_test, labels_test)
feed_dict_train = {x: batch_x,y_true: batch_y}
loss_, acc = session.run([loss, accuracy], feed_dict=feed_dict_train)
training_accuracy_per_epoch += acc
loss_per_epoch += loss_
if minibatches_per_epoch != 0:
loss_per_epoch /= minibatches_per_epoch
training_accuracy_per_epoch /= minibatches_per_epoch
test_accuracy = session.run(accuracy, feed_dict={x: images_test,y_true: labels_test})
print( "Epoch = " + str(epoch) + \
"\tMinibatches seen = " + str(num_minibatches_trained) + \
"\tNum Minibatches = " + str(len(batch_sequence)) + \
"\tEpoch Loss = " + "{:.2f}".format(loss_per_epoch) + \
"\tTrain Acc = " + "{:.2f}".format(training_accuracy_per_epoch) + \
"\tTest Acc = " + "{:.2f}".format(test_accuracy))
all_loss.append(loss_per_epoch)
all_testing_accuracy.append(test_accuracy)
all_training_accuracy.append(training_accuracy_per_epoch)
all_minibatches_seen.append(num_minibatches_trained)
all_num_minibatches.append(len(batch_sequence))
previous_training_accuracy = all_training_accuracy[-1]
process = psutil.Process(os.getpid())
print(process.memory_info().rss)
save_everything(name, all_testing_accuracy, all_training_accuracy, all_loss, all_minibatches_seen, all_num_minibatches)
######################
#call function
######################
optimize_using_overtraining_and_undertraining(epochs, accuracy_range = 1, name="over_gen_extended_2", increase =30)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment