Created
April 17, 2017 19:20
-
-
Save sidgan/292edde55d6e9edd6a4b8520453017cb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##### | |
# modifiled from https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/06_CIFAR-10.ipynb | |
##### | |
import matplotlib.pyplot as plt | |
import tensorflow as tf | |
import numpy as np | |
from sklearn.metrics import confusion_matrix | |
import time | |
from datetime import timedelta | |
import math | |
import os | |
import random | |
#from itertools import * | |
import itertools | |
# Use PrettyTensor to simplify Neural Network construction. | |
import prettytensor as pt | |
seed = 7 | |
np.random.seed(seed) | |
import _pickle as cPickle | |
epochs = 1000 | |
import cifar10 | |
cifar10.maybe_download_and_extract() | |
class_names = cifar10.load_class_names() | |
print(class_names) | |
images_train, cls_train, labels_train = cifar10.load_training_data() | |
number_train = random.randrange(0,len(images_train)-9) | |
size_training_data = len(images_train) | |
images_test, cls_test, labels_test = cifar10.load_test_data() | |
number_test = random.randrange(0,len(images_test)-9) | |
print("Size of:") | |
print("- Training-set:\t\t{}".format(len(images_train))) | |
print("- Test-set:\t\t{}".format(len(images_test))) | |
print(images_test.shape) | |
print(images_train.shape) | |
from cifar10 import img_size, num_channels, num_classes | |
img_size_cropped = 24 | |
import psutil | |
process = psutil.Process(os.getpid()) | |
print(process.memory_info().rss) | |
###################### | |
#placeholders for image and labels | |
###################### | |
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size, num_channels], name='x') | |
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') | |
y_true_cls = tf.argmax(y_true, dimension=1) | |
###################### | |
#preprocessing actual function | |
###################### | |
def pre_process_image(image, training): | |
# This function takes a single image as input, | |
# and a boolean whether to build the training or testing graph. | |
if training: | |
# For training, add the following to the TensorFlow graph. | |
# Randomly crop the input image. | |
image = tf.random_crop(image, size=[img_size_cropped, img_size_cropped, num_channels]) | |
# Randomly flip the image horizontally. | |
image = tf.image.random_flip_left_right(image) | |
# Randomly adjust hue, contrast and saturation. | |
image = tf.image.random_hue(image, max_delta=0.05) | |
image = tf.image.random_contrast(image, lower=0.3, upper=1.0) | |
image = tf.image.random_brightness(image, max_delta=0.2) | |
image = tf.image.random_saturation(image, lower=0.0, upper=2.0) | |
# Some of these functions may overflow and result in pixel | |
# values beyond the [0, 1] range. It is unclear from the | |
# documentation of TensorFlow 0.10.0rc0 whether this is | |
# intended. A simple solution is to limit the range. | |
# Limit the image pixels between [0, 1] in case of overflow. | |
image = tf.minimum(image, 1.0) | |
image = tf.maximum(image, 0.0) | |
else: | |
# For training, add the following to the TensorFlow graph. | |
# Crop the input image around the centre so it is the same | |
# size as images that are randomly cropped during training. | |
image = tf.image.resize_image_with_crop_or_pad(image, | |
target_height=img_size_cropped, | |
target_width=img_size_cropped) | |
return image | |
###################### | |
#preprocessing call to function | |
###################### | |
def pre_process(images, training): | |
# Use TensorFlow to loop over all the input images and call | |
# the function above which takes a single image as input. | |
images = tf.map_fn(lambda image: pre_process_image(image, training), images) | |
return images | |
###################### | |
#preprocessing all images using placeholders | |
###################### | |
distorted_images = pre_process(images=x, training=True) | |
###################### | |
#network define | |
###################### | |
def main_network(images, training): | |
# Wrap the input images as a Pretty Tensor object. | |
x_pretty = pt.wrap(images) | |
# Pretty Tensor uses special numbers to distinguish between | |
# the training and testing phases. | |
if training: | |
phase = pt.Phase.train | |
else: | |
phase = pt.Phase.infer | |
# Create the convolutional neural network using Pretty Tensor. | |
# It is very similar to the previous tutorials, except | |
# the use of so-called batch-normalization in the first layer. | |
with pt.defaults_scope(activation_fn=tf.nn.relu, phase=phase): | |
y_pred, loss = x_pretty.\ | |
conv2d(kernel=5, depth=64, name='layer_conv1', batch_normalize=True).\ | |
max_pool(kernel=2, stride=2).\ | |
conv2d(kernel=5, depth=64, name='layer_conv2').\ | |
max_pool(kernel=2, stride=2).\ | |
flatten().\ | |
fully_connected(size=256, name='layer_fc1').\ | |
fully_connected(size=128, name='layer_fc2').\ | |
softmax_classifier(num_classes=num_classes, labels=y_true) | |
#add dropout to this network? | |
return y_pred, loss | |
###################### | |
#give the network placeholders | |
###################### | |
def create_network(training): | |
# Wrap the neural network in the scope named 'network'. | |
# Create new variables during training, and re-use during testing. | |
with tf.variable_scope('network', reuse=not training): | |
# Just rename the input placeholder variable for convenience. | |
images = x | |
# Create TensorFlow graph for pre-processing. | |
images = pre_process(images=images, training=training) | |
# Create TensorFlow graph for the main processing. | |
y_pred, loss = main_network(images=images, training=training) | |
return y_pred, loss | |
###################### | |
#save variables and optimize | |
###################### | |
global_step = tf.Variable(initial_value=0,name='global_step', trainable=False) | |
_, loss = create_network(training=True) | |
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, global_step=global_step) | |
y_pred, _ = create_network(training=False) | |
y_pred_cls = tf.argmax(y_pred, dimension=1) | |
correct_prediction = tf.equal(y_pred_cls, y_true_cls) | |
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | |
saver = tf.train.Saver() | |
###################### | |
#function for saving weights of the model | |
###################### | |
def get_weights_variable(layer_name): | |
# Retrieve an existing variable named 'weights' in the scope | |
# with the given layer_name. | |
# This is awkward because the TensorFlow function was | |
# really intended for another purpose. | |
with tf.variable_scope("network/" + layer_name, reuse=True): | |
variable = tf.get_variable('weights') | |
return variable | |
###################### | |
#get weights of the layers | |
###################### | |
weights_conv1 = get_weights_variable(layer_name='layer_conv1') | |
weights_conv2 = get_weights_variable(layer_name='layer_conv2') | |
###################### | |
#function to get output of the layer | |
###################### | |
def get_layer_output(layer_name): | |
# The name of the last operation of the convolutional layer. | |
# This assumes you are using Relu as the activation-function. | |
tensor_name = "network/" + layer_name + "/Relu:0" | |
# Get the tensor with this name. | |
tensor = tf.get_default_graph().get_tensor_by_name(tensor_name) | |
return tensor | |
###################### | |
#get output of the layers | |
###################### | |
output_conv1 = get_layer_output(layer_name='layer_conv1') | |
output_conv2 = get_layer_output(layer_name='layer_conv2') | |
###################### | |
#session variables and try to save | |
###################### | |
session = tf.Session() | |
save_dir = 'checkpoints/' | |
if not os.path.exists(save_dir): | |
os.makedirs(save_dir) | |
save_path = os.path.join(save_dir, 'cifar10_cnn') | |
try: | |
print("Trying to restore last checkpoint ...") | |
# Use TensorFlow to find the latest checkpoint - if any. | |
last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir) | |
# Try and load the data in the checkpoint. | |
saver.restore(session, save_path=last_chk_path) | |
# If we get to this point, the checkpoint was successfully loaded. | |
print("Restored checkpoint from:", last_chk_path) | |
except: | |
# If the above failed for some reason, simply | |
# initialize all the variables for the TensorFlow graph. | |
print("Failed to restore checkpoint. Initializing variables instead.") | |
session.run(tf.global_variables_initializer()) | |
###################### | |
#variables for training | |
###################### | |
train_batch_size = 128 | |
batch_size = train_batch_size | |
###################### | |
#overtraining and generalization functions | |
###################### | |
def random_batch(): | |
# Number of images in the training-set. | |
num_images = len(images_train) | |
# Create a random index. | |
idx = np.random.choice(num_images,size=train_batch_size,replace=False) | |
# Use the random index to select random images and labels. | |
x_batch = images_train[idx, :, :, :] | |
y_batch = labels_train[idx, :] | |
return x_batch, y_batch | |
def convert_to_single_from_one_hot(l): | |
return list(l).index(1) | |
###################### | |
# next batch | |
###################### | |
def next_batch(batch_number, batch_size, data, labels): | |
""" | |
Returns a total of `num` samples from the image array `data` and label array `labels` | |
[(minibatchNumber -1)*sizeOfMiniBatch , minibatchNumber *sizeOfMiniBatch -1] | |
""" | |
decrease = 0 | |
idx = np.arange(batch_number, len(data)) # get all possible indexes | |
original_length = len(idx) | |
if decrease: | |
idx = idx[decrease:] | |
new_length = len(idx) | |
assert new_length + decrease == original_length | |
idx = idx[(batch_number-1) * batch_size:batch_number*batch_size-1] # use only `num` random indexes | |
data_shuffle = [data[i] for i in idx] # get list of `num` random samples | |
label_shuffle = [labels[i] for i in idx] # get list of `num` random samples | |
for index in range(len(label_shuffle)): | |
print("----Index number: ", index, "\t----Label: ", \ | |
convert_to_single_from_one_hot(label_shuffle[index]),\ | |
"\t----Image Shape", data_shuffle[index].shape) | |
#print(label_shuffle) | |
return data_shuffle, label_shuffle | |
###################### | |
# | |
###################### | |
def is_overtrain(training_accuracy, test_accuracy): | |
if distance_point_line(training_accuracy, test_accuracy, "normal") < \ | |
distance_point_line(training_accuracy, test_accuracy, "overtrain"): | |
return False | |
else: | |
print(" \n******** overtrain ********\n ") | |
return True | |
###################### | |
# | |
###################### | |
def batch_navigator(training_accuracy, test_accuracy, epoch, current_batch_sequence, constant_training_accuracy, increase): | |
if is_overtrain(training_accuracy, test_accuracy) or constant_training_accuracy: | |
#IF TRAINING ACCURACY IS NOT IMPROVING IN MORE THAN X EPOCHS THEN INCREASE THE NUMBER OF BATCHES | |
print("******** Increasing number of batches because training accuracy is within the same window ********") | |
return range(len(current_batch_sequence) + increase) | |
else: | |
return current_batch_sequence | |
###################### | |
# | |
###################### | |
def distance_point_line(point_x,point_y,source): | |
if source == "normal": | |
return float( abs( -1*point_x + point_y ) / math.sqrt(2)) | |
elif source == "overtrain": | |
return abs( point_x -1 ) | |
else: | |
return "ERROR from distance_point_line" | |
def save_everything(dir_name, all_testing_accuracy, all_training_accuracy, all_loss, all_minibatches_seen, all_num_minibatches): | |
assert len(all_loss) == len(all_testing_accuracy) == len(all_training_accuracy) | |
if not os.path.exists(dir_name): | |
os.makedirs(dir_name) | |
cPickle.dump(all_testing_accuracy, open(dir_name+'/test_accuracy.p', 'wb')) | |
cPickle.dump(all_training_accuracy, open(dir_name+'/train_accuracy.p', 'wb')) | |
cPickle.dump(all_loss, open(dir_name+'/loss.p', 'wb')) | |
cPickle.dump(all_minibatches_seen, open(dir_name+'/minibatches_seen.p', 'wb')) | |
cPickle.dump(all_num_minibatches, open(dir_name+'/num_minibatches.p', 'wb')) | |
###################### | |
# function for overtraining and gen using range | |
###################### | |
def optimize_using_overtraining_and_undertraining(epochs, accuracy_range, name, increase): | |
# Start-time used for printing time-usage below. | |
start_time = time.time() | |
#USE OVERTRAINING AND GENERALIZATION HERE | |
number_of_minibatches = int(size_training_data / (batch_size * 1.0)) | |
images_train, labels_train = random_batch() | |
training_accuracy_per_epoch = 0 | |
loss_per_epoch = 0 | |
minibatches_per_epoch = 0 | |
test_accuracy = 0 | |
all_training_accuracy = [] | |
all_testing_accuracy = [] | |
all_loss = [] | |
all_minibatches_seen = [] | |
all_num_minibatches = [] | |
num_minibatches_trained = 0 | |
previous_training_accuracy = 0 | |
constant_training_accuracy = False | |
#what was the batch size in last epoch | |
for epoch in range(epochs): | |
if epoch == 0: | |
current_batch_sequence = [1] | |
batch_sequence = current_batch_sequence | |
elif epoch > 5: | |
if all(previous_training_accuracy <= (accuracy_range*i)+i for i in all_training_accuracy[-6:len(all_training_accuracy)-1]) \ | |
and all(previous_training_accuracy >= abs(i-(accuracy_range*i)) for i in all_training_accuracy[-6:len(all_training_accuracy)-1]): | |
constant_training_accuracy = True | |
else: | |
constant_training_accuracy = False | |
current_batch_sequence = batch_navigator(training_accuracy_per_epoch, test_accuracy, epoch, current_batch_sequence, constant_training_accuracy, increase) | |
batch_sequence = current_batch_sequence | |
else: | |
current_batch_sequence = batch_navigator(training_accuracy_per_epoch, test_accuracy, epoch, current_batch_sequence, False, increase) | |
batch_sequence = current_batch_sequence | |
training_accuracy_per_epoch = 0 | |
loss_per_epoch = 0 | |
minibatches_per_epoch = 0 | |
test_accuracy = 0 | |
for minibatch_number in batch_sequence: | |
batch_x, batch_y = next_batch(minibatch_number, batch_size, images_train, labels_train) | |
feed_dict_train = {x: batch_x,y_true: batch_y} | |
i_global, _ = session.run([global_step, optimizer], feed_dict=feed_dict_train) | |
num_minibatches_trained +=1 | |
minibatches_per_epoch +=1 | |
for minibatch_number in batch_sequence: | |
batch_x, batch_y = next_batch(minibatch_number, batch_size, images_test, labels_test) | |
feed_dict_train = {x: batch_x,y_true: batch_y} | |
loss_, acc = session.run([loss, accuracy], feed_dict=feed_dict_train) | |
training_accuracy_per_epoch += acc | |
loss_per_epoch += loss_ | |
if minibatches_per_epoch != 0: | |
loss_per_epoch /= minibatches_per_epoch | |
training_accuracy_per_epoch /= minibatches_per_epoch | |
test_accuracy = session.run(accuracy, feed_dict={x: images_test,y_true: labels_test}) | |
print( "Epoch = " + str(epoch) + \ | |
"\tMinibatches seen = " + str(num_minibatches_trained) + \ | |
"\tNum Minibatches = " + str(len(batch_sequence)) + \ | |
"\tEpoch Loss = " + "{:.2f}".format(loss_per_epoch) + \ | |
"\tTrain Acc = " + "{:.2f}".format(training_accuracy_per_epoch) + \ | |
"\tTest Acc = " + "{:.2f}".format(test_accuracy)) | |
all_loss.append(loss_per_epoch) | |
all_testing_accuracy.append(test_accuracy) | |
all_training_accuracy.append(training_accuracy_per_epoch) | |
all_minibatches_seen.append(num_minibatches_trained) | |
all_num_minibatches.append(len(batch_sequence)) | |
previous_training_accuracy = all_training_accuracy[-1] | |
process = psutil.Process(os.getpid()) | |
print(process.memory_info().rss) | |
save_everything(name, all_testing_accuracy, all_training_accuracy, all_loss, all_minibatches_seen, all_num_minibatches) | |
###################### | |
#call function | |
###################### | |
optimize_using_overtraining_and_undertraining(epochs, accuracy_range = 1, name="over_gen_extended_2", increase =30) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment