Last active
April 29, 2018 04:39
-
-
Save thejevans/c97e10045b9b61ea4d17e8115d2d017e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ### Homework 4 | |
# #### Problem 3 -- Autoencoder | |
# ###### John Evans | |
# ###### 4/26/18 | |
# ##### Imports | |
# In[ ]: | |
from tqdm import tqdm | |
import numpy as np | |
import cv2 | |
import sys | |
import os | |
# Suppress Tensorflow outputs | |
if len(sys.argv) > 2: | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = sys.argv[2] | |
else: | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
# comment out to use GPU | |
#os.environ['CUDA_VISIBLE_DEVICES'] = 'all' | |
# add path to libcuda.so.1 | |
#os.environ['LD_LIBRARY_PATH'] = '/usr/local/nvidia/lib64:' | |
#os.environ['USE_CUDA_PATH'] = '/usr/local/nvidia/lib64:' | |
#os.environ['LIBRARY_PATH'] = '/usr/local/nvidia/lib64:' | |
#os.environ['PATH'] = '/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/nvidia/lib64:$PATH' | |
from sklearn.model_selection import train_test_split | |
import tensorflow as tf | |
# ##### Data preparation functions | |
# In[ ]: | |
# Import images from files | |
def import_images(wd): | |
images = [] | |
for file in tqdm(os.listdir(wd)): | |
if num_channels == 1: | |
image = cv2.imread(os.path.join(wd, file), 0) | |
else: | |
image = cv2.imread(os.path.join(wd, file)) | |
if image is not None: | |
images.append(image) | |
return images | |
# Process images (make square, resize) | |
def process_images(images, img_size, num_channels): | |
min_dim = img_size | |
new_images = [] | |
for im in tqdm(images): | |
old_size = im.shape | |
ratio = float(min_dim)/max(old_size) | |
new_size = tuple([int(x*ratio) for x in old_size]) | |
im = cv2.resize(im, (new_size[1], new_size[0])) | |
delta_w = min_dim - new_size[1] | |
delta_h = min_dim - new_size[0] | |
top, bottom = delta_h//2, delta_h-(delta_h//2) | |
left, right = delta_w//2, delta_w-(delta_w//2) | |
color = [0, 0, 0] | |
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) | |
new_images.append(im) | |
return new_images | |
# ##### Layer and model functions | |
# In[ ]: | |
# Convolution Layer | |
def convolution_layer(name, data, kernel_shape, strides=[1, 1, 1, 1]): | |
with tf.name_scope(name): | |
W = tf.get_variable(name='w_' + name, shape=kernel_shape, initializer=tf.contrib.layers.xavier_initializer(uniform=False)) | |
b = tf.get_variable(name='b_' + name, shape=[kernel_shape[3]], initializer=tf.contrib.layers.xavier_initializer(uniform=False)) | |
out = tf.nn.conv2d(data, W, strides=strides, padding='SAME') | |
out = tf.nn.bias_add(out, b) | |
return tf.nn.relu(out) | |
# Pooling Layer | |
def pooling_layer(name, data, kernel_shape=[1, 2, 2, 1], strides=[1, 2, 2, 1]): | |
with tf.name_scope(name): | |
return tf.nn.max_pool(data, ksize=kernel_shape, strides=strides, padding='SAME') | |
# Dropout Layer | |
def dropout_layer(name, data, keep_rate): | |
with tf.name_scope(name): | |
return tf.nn.dropout(data, keep_rate) | |
# Fully Connected Layer | |
def fully_connected_layer(name, data, nodes, has_color = False): | |
with tf.name_scope(name): | |
input_size = data.shape[1:] | |
input_size = int(np.prod(input_size)) | |
W = tf.get_variable(name='w_' + name, shape=[input_size, nodes], initializer=tf.contrib.layers.xavier_initializer(uniform=False)) | |
b = tf.get_variable(name='b_'+name, shape=[nodes], initializer=tf.contrib.layers.xavier_initializer(uniform=False)) | |
if has_color: | |
data = tf.reshape(data, [-1, input_size, 3]) | |
else: | |
data = tf.reshape(data, [-1, input_size]) | |
return tf.nn.relu(tf.add(tf.matmul(data, W), b)) | |
# Deconvolution Layer | |
def deconvolution_layer(name, data, num_outputs, kernel_shape, strides=[1, 1]): | |
with tf.name_scope(name): | |
kwargs = {'num_outputs':num_outputs, 'kernel_size':kernel_shape, 'stride':strides, 'padding':'SAME', | |
'weights_initializer':tf.contrib.layers.xavier_initializer_conv2d(uniform=False), | |
'biases_initializer':tf.contrib.layers.xavier_initializer(uniform=False), 'activation_fn':tf.nn.relu} | |
return tf.contrib.layers.conv2d_transpose(data, **kwargs) | |
# Upsample Layer | |
def upsample_layer(name, data, scale_factor=[2,2]): | |
with tf.name_scope(name): | |
size = [int(data.shape[1] * scale_factor[0]), int(data.shape[2] * scale_factor[1])] | |
return tf.image.resize_bilinear(data, size=size, align_corners=None, name=None) | |
# Network model from layers | |
def model(name, data, img_size=128): | |
""" | |
img_size % 2 must = 0 | |
We want to get dimensionality reduction of 16384 to 4096 | |
Layers: | |
reshape1 --> 128, 128 (16384) | |
conv --> kernel size: (5,5), n_filters:25 ???make it small so that it runs fast | |
pool --> 64, 64, 25 | |
dropout1 --> keeprate 0.8 | |
reshape2 --> 64*64*25 | |
fc1 --> 64*64*25, 64*64*5 | |
dropout2 --> keeprate 0.8 | |
fc2 --> 64*64*5, 64*64 --> output is the encoder vars | |
fc3 --> 64*64, 64*64*5 | |
dropout3 --> keeprate 0.8 | |
fc4 --> 64*64*5, 64*64*25 | |
dropout4 --> keeprate 0.8 | |
reshape3 --> 64, 64, 25 | |
deconv --> kernel size:(5,5,25), n_filters: 25 | |
upsample --> 128, 128, 25 | |
fc5 --> 128*128*25, 128*128 | |
""" | |
assert img_size % 2 == 0 | |
red_img_size = int(img_size / 2) | |
with tf.name_scope(name): | |
# Encode data | |
print(1) | |
reshape1 = tf.reshape(data, shape=[-1, img_size, img_size, 1]) | |
print(1) | |
conv = convolution_layer('conv', reshape1, [5,5,1,25]) | |
print(1) | |
pool = pooling_layer('pool', conv) | |
print(1) | |
dropout1 = dropout_layer('dropout1', pool, 0.8) | |
print(1) | |
reshape2 = tf.reshape(dropout1, shape=[-1, 25*red_img_size**2]) | |
print(1) | |
fc1 = fully_connected_layer('fc1', reshape2, 5*red_img_size**2) | |
print(1) | |
dropout2 = dropout_layer('dropout2', fc1, 0.8) | |
print(1) | |
fc2 = fully_connected_layer('fc2', dropout2, red_img_size**2) | |
print(1) | |
# Decode encoded data | |
fc3 = fully_connected_layer('fc3', fc2, 5*red_img_size**2) | |
print(1) | |
dropout3 = dropout_layer('dropout3', fc3, 0.8) | |
print(1) | |
fc4 = fully_connected_layer('fc4', dropout3, 25*red_img_size**2) | |
print(1) | |
dropout4 = dropout_layer('dropout4', fc4, 0.8) | |
print(1) | |
reshape3 = tf.reshape(dropout4, shape=[-1, red_img_size, red_img_size, 25]) | |
print(1) | |
deconv = deconvolution_layer('deconv', reshape3, 25, [5,5]) | |
print(1) | |
upsample = upsample_layer('upsample', deconv) | |
print(1) | |
fc5 = fully_connected_layer('fc5', upsample, img_size**2, has_color=True) | |
print(1) | |
# Get difference after encoding/decoding process | |
with tf.name_scope('cost'): | |
cost = tf.reduce_mean(tf.square(tf.subtract(fc5, data))) | |
return fc5, cost | |
# ##### Training functions | |
# | |
# (NEEDS TO BE REWRITTEN) | |
# In[ ]: | |
def next_batch(images, labels, start, batch_size): | |
end = start + batch_size | |
if end > len(images): | |
# After each epoch we update this | |
start = 0 | |
end = batch_size | |
assert batch_size <= len(images) | |
return end, images[start:end], labels[start:end] | |
def train_network(x, n_epochs=5, gpu_mem_limit=None): | |
prediction, cost = model('ConvAutoEnc', x) | |
with tf.name_scope('opt'): | |
optimizer = tf.train.AdamOptimizer().minimize(cost) | |
# Create a summary to monitor cost tensor | |
tf.summary.scalar("cost", cost) | |
# Merge all summaries into a single op | |
merged_summary_op = tf.summary.merge_all() | |
if gpu_mem_limit is None: | |
kwargs = {} | |
else: | |
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_mem_limit) | |
kwargs = {'config':tf.ConfigProto(gpu_options=gpu_options)} | |
with tf.Session(**kwargs) as sess: | |
sess.run(tf.global_variables_initializer()) | |
# create log writer object | |
writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) | |
train_start = 0 | |
#test_start = 0 | |
for epoch in tqdm(range(n_epochs)): | |
avg_cost = 0 | |
n_batches = int(len(train_data) / batch_size) | |
# Loop over all batches | |
for i in tqdm(range(n_batches)): | |
train_start, batch_x, batch_y = next_batch(train_data, train_labels, train_start, batch_size) | |
#test_start, x_valid_batch, y_valid_batch = next_batch(test_data, test_labels, test_start, batch_size) | |
# Run optimization op (backprop) and cost op (to get loss value) | |
_, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_x, y: batch_y}) | |
# Compute average loss | |
avg_cost += c / n_batches | |
# write log | |
writer.add_summary(summary, epoch * n_batches + i) | |
# Display logs per epoch step | |
print('Epoch', epoch+1, ' / ', n_epochs, 'cost:', avg_cost) | |
print('Optimization Finished') | |
print('Cost:', cost.eval({x: test_data})) | |
# ##### Build and train network | |
# In[ ]: | |
validation_size = 0.15 | |
img_size = 128 | |
num_channels = 1 | |
items = [] | |
labels = [] | |
types = ["daisy", "dandelion", "rose", "sunflower", "tulip"] | |
num_classes = len(types) | |
map = {"daisy":[1,0,0,0,0], "dandelion":[0,1,0,0,0], "rose":[0,0,1,0,0], "sunflower":[0,0,0,1,0], "tulip":[0,0,0,0,1]} | |
if sys.argv[1][:-1] != '/': | |
sys.argv[1] = ''.join([sys.argv[1],'/']) | |
for t in types: | |
images = np.array(import_images(''.join([sys.argv[1],t]))) | |
images = process_images(images, img_size, num_channels) | |
for i in images: | |
items.append(i) | |
labels.append(map[t]) | |
images = np.array(items) | |
images = np.reshape(images, (len(images),img_size**2, num_channels)) | |
labels = np.array(labels) | |
# Split training and testing | |
train_data, test_data, train_labels, test_labels = train_test_split(images, labels, test_size=validation_size) | |
n_classes = 5 | |
batch_size = 100 | |
x = tf.placeholder(tf.float32, [None, img_size**2,num_channels], name='InputData') | |
y = tf.placeholder(tf.float32, [None, n_classes], name='LabelData') | |
train_network(x) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment