Last active
January 25, 2016 18:45
-
-
Save botev/c87bc8b05c5b642f15ad to your computer and use it in GitHub Desktop.
Hinton Autoencoder for theano and tf for benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import numpy as np | |
import os | |
import gzip | |
import sys | |
import time | |
from collections import OrderedDict | |
floatX = "float32" | |
def download_data(folder): | |
if not os.path.exists(folder): | |
os.mkdir(folder) | |
file_name = "mnist.pkl.gz" | |
file_path = os.path.join(folder, file_name) | |
if sys.version_info.major < 3: | |
import urllib2 as urllib | |
else: | |
import urllib.request as urllib | |
if not os.path.exists(file_path): | |
url = "http://deeplearning.net/data/mnist/mnist.pkl.gz" | |
proxy = os.environ.get('HTTP_PROXY', None) | |
if proxy is not None: | |
proxy = urllib.ProxyHandler({'http': proxy}) | |
opener = urllib.build_opener(proxy) | |
urllib.install_opener(opener) | |
with open(file_path, 'wb') as f: | |
f.write(urllib.urlopen(url).read()) | |
f.close() | |
def load_data(folder): | |
file_name = "mnist.pkl.gz" | |
file_path = os.path.join(folder, file_name) | |
with gzip.open(file_path, 'rb') as f: | |
if sys.version_info.major < 3: | |
import cPickle as pickle | |
data = pickle.load(f) | |
else: | |
import pickle | |
u = pickle._Unpickler(f) | |
u.encoding = 'latin1' | |
data = u.load() | |
# Make images to be floatX, while targets int8 | |
images = np.concatenate((data[0][0].astype(floatX), | |
data[1][0].astype(floatX)), | |
axis=0) | |
labels = np.concatenate((data[0][1].astype(floatX), | |
data[1][1].astype(floatX)), | |
axis=0) | |
return images, labels | |
def main_theano(batch_size , factor, burnout, epochs, period=1): | |
import theano | |
import theano.tensor as T | |
download_data("mnist_hinton") | |
images, labels = load_data("mnist_hinton") | |
d = [784, factor * 1000, factor * 500, factor * 250, factor * 30, | |
factor * 250, factor * 500, factor * 1000, 784] | |
learning_rate = 0.01 | |
data_in = T.matrix(name="Input", dtype=floatX) | |
params = list() | |
for i in range(1, 9): | |
params.append(theano.shared(np.random.randn(d[i-1], d[i]).astype(floatX) / 100.0, | |
name='W_' + str(i))) | |
params.append(theano.shared(np.zeros(d[i]).astype(floatX), name='b_' + str(i))) | |
# input layer | |
h = T.tanh(T.dot(data_in, params[0]) + params[1]) | |
for i in range(1, 7): | |
h = T.tanh(T.dot(h, params[2*i]) + params[2*i+1]) | |
h = T.nnet.sigmoid(T.dot(h, params[14]) + params[15]) | |
error = T.nnet.binary_crossentropy(h, data_in) | |
loss = error.sum() / np.asarray(batch_size, dtype=floatX) | |
grads = T.grad(loss, params) | |
updates = OrderedDict() | |
for p, g in zip(params, grads): | |
updates[p] = p - learning_rate * g | |
start_time = time.time() | |
func = theano.function([data_in], loss, updates=updates) | |
compile_time = float(1000*(time.time() - start_time)) | |
vals = np.zeros(epochs // period) | |
num_images = images.shape[0] | |
for i in range(epochs + burnout): | |
if i == burnout: | |
start_time = time.time() | |
ind = i // (num_images / batch_size) | |
data = images[ind:ind+batch_size] | |
if i >= burnout and (i + 1 - burnout) % period == 0: | |
vals[(i - burnout) // period] = func(data) | |
else: | |
func(data) | |
overall = time.time() - start_time | |
return overall, compile_time | |
def main_tf(batch_size, factor, burnout, epochs, period=1): | |
import tensorflow as tf | |
download_data("mnist_hinton") | |
images, labels = load_data("mnist_hinton") | |
d = [784, factor * 1000, factor * 500, factor * 250, factor * 30, | |
factor * 250, factor * 500, factor * 1000, 784] | |
learning_rate = 0.01 | |
data_in = tf.placeholder(tf.float32, [None, 784]) | |
params = list() | |
for i in range(1, 9): | |
params.append(tf.Variable(np.random.randn(d[i-1], d[i]).astype(floatX) / 100.0, | |
name='W_' + str(i))) | |
params.append(tf.Variable(np.zeros(d[i]).astype(floatX), name='b_' + str(i))) | |
# input layer | |
h = tf.nn.tanh(tf.matmul(data_in, params[0]) + params[1]) | |
for i in range(1, 7): | |
h = tf.nn.tanh(tf.matmul(h, params[2*i]) + params[2*i+1]) | |
h = tf.matmul(h, params[14]) + params[15] | |
error = tf.nn.sigmoid_cross_entropy_with_logits(h, data_in) | |
loss = tf.reduce_sum(error) / np.asarray(batch_size, dtype=floatX) | |
tf.scalar_summary("loss", loss) | |
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) | |
start_time = time.time() | |
init = tf.initialize_all_variables() | |
sess = tf.Session() | |
sess.run(init) | |
compile_time = float(1000*(time.time() - start_time)) | |
vals = np.zeros(epochs // period) | |
num_images = images.shape[0] | |
for i in range(epochs + burnout): | |
if i == burnout: | |
start_time = time.time() | |
ind = i // (num_images / batch_size) | |
data = images[ind:ind+batch_size] | |
if i >= burnout and (i + 1 - burnout) % period == 0: | |
_, vals[(i - burnout) // period] = sess.run([train_step, loss], feed_dict={data_in: data}) | |
else: | |
sess.run([train_step, loss], feed_dict={data_in: data}) | |
overall = float(1000*(time.time() - start_time)) / float(epochs) | |
from tensorflow.python.framework import ops | |
ops.reset_default_graph() | |
return overall, compile_time | |
if __name__ == "__main__": | |
if len(sys.argv) > 3: | |
print("Expecting no more than 2 arguments") | |
backend = 'theano' | |
repeats = 100 | |
burnout = 100 | |
epochs = 500 | |
if len(sys.argv) > 1: | |
backend = sys.argv[1] | |
if len(sys.argv) > 2: | |
repeats = int(sys.argv[2]) | |
if len(sys.argv) > 3: | |
burnout = int(sys.argv[3]) | |
if len(sys.argv) > 4: | |
epochs = int(sys.argv[4]) | |
batch_size_grid = [1000, 5000, 10000] | |
factor_grid = [1, 5, 10] | |
run_times = np.zeros((3, 3, repeats)) | |
compile_times = np.zeros((3, 3, repeats)) | |
if backend == 'theano': | |
run_func = main_theano | |
else: | |
run_func = main_tf | |
for b, batch_size in enumerate(batch_size_grid): | |
for f, factor in enumerate(factor_grid): | |
print("Running for batch size", batch_size, "and factor", factor) | |
for i in range(repeats): | |
run_times[b, f, i], compile_times[b, f, i] = run_func(batch_size, factor, burnout, epochs) | |
print("Run:", run_times[b, f, i], compile_times[b, f, i]) | |
np.savez(backend + "_times", run_times=run_times, compile_times=compile_times) | |
run_mean = np.mean(run_times, axis=2) | |
run_std = np.std(run_times, axis=2) | |
compile_mean = np.mean(compile_times, axis=2) | |
compile_std = np.std(compile_times, axis=2) | |
print("Run Means:") | |
print(run_mean) | |
print("Run Stds:") | |
print(run_std) | |
print("Compile Means:") | |
print(compile_mean) | |
print("Compile Stds:") | |
print(compile_std) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment