Skip to content

Instantly share code, notes, and snippets.

@TakuTsuzuki
Last active August 29, 2015 14:06
Show Gist options
  • Save TakuTsuzuki/83da44e6544cf89784ad to your computer and use it in GitHub Desktop.
Save TakuTsuzuki/83da44e6544cf89784ad to your computer and use it in GitHub Desktop.
5th-layer denoising Autoencoder
import numpy
import argparse
import cPickle as pickle
import utils
class Autoencoder3(object):
def __init__(self, n_visible = 784, n_hidden1 = 784,
n_hidden2 = 784, n_hidden3 = 784,
n_hidden4 = 784, n_hidden5 = 784,
W1 = None, W2 = None, b1 =None, b2 = None,#1
W3 = None, W4 = None, b3 =None, b4 = None,#2
W5 = None, W6 = None, b5 =None, b6 = None,#3
W7 = None, W8 = None, b7 =None, b8 = None,#4
W9 = None, W10 = None, b9 =None, b10 = None,#5
noise = 0.0, alpha =0.1, untied = False):
self.rng = numpy.random.RandomState(1)
r = numpy.sqrt(6. / (n_hidden1 + n_visible + 1))
l = numpy.sqrt(6. / (n_hidden2 + n_hidden1 + 1))
m = numpy.sqrt(6. / (n_hidden3 + n_hidden2 + 1))
n = numpy.sqrt(6. / (n_hidden4 + n_hidden3 + 1))
o = numpy.sqrt(6. / (n_hidden5 + n_hidden4 + 1))
if W1 == None:
self.W1 = self.random_init(r, (n_hidden1, n_visible))
if W3 == None:
self.W3 = self.random_init(l, (n_hidden2, n_hidden1))
if W5 == None:
self.W5 = self.random_init(m, (n_hidden3, n_hidden2))
if W7 == None:
self.W7 = self.random_init(n, (n_hidden4, n_hidden3))
if W9 == None:
self.W9 = self.random_init(n, (n_hidden5, n_hidden4))
if W2 == None:
if untied:
W2 = self.random_init(r, (n_visible, n_hidden1))
else:
W2 = self.W1.T
if W4 == None:
if untied:
W4 = self.random_init(l, (n_hidden1, n_hidden2))
else:
W4 = self.W3.T
if W6 == None:
if untied:
W6 = self.random_init(m, (n_hidden2, n_hidden3))
else:
W6 = self.W5.T
if W8 == None:
if untied:
W8 = self.random_init(m, (n_hidden3, n_hidden4))
else:
W8 = self.W7.T
if W10 == None:
if untied:
W10 = self.random_init(m, (n_hidden4, n_hidden5))
else:
W10 = self.W9.T
self.W2 = W2
self.W4 = W4
self.W6 = W6
self.W8 = W8
self.W10 = W10
if b1 == None:
self.b1 = numpy.zeros(n_hidden1)
if b2 == None:
self.b2 = numpy.zeros(n_visible)
if b3 == None:
self.b3 = numpy.zeros(n_hidden2)
if b4 == None:
self.b4 = numpy.zeros(n_hidden1)
if b5 == None:
self.b5 = numpy.zeros(n_hidden3)
if b6 == None:
self.b6 = numpy.zeros(n_hidden2)
if b7 == None:
self.b7 = numpy.zeros(n_hidden4)
if b8 == None:
self.b8 = numpy.zeros(n_hidden3)
if b9 == None:
self.b9 = numpy.zeros(n_hidden5)
if b10 == None:
self.b10 = numpy.zeros(n_hidden4)
self.n_visible = n_visible
self.n_hidden1 = n_hidden1
self.n_hidden2 = n_hidden2
self.n_hidden3 = n_hidden3
self.n_hidden4 = n_hidden4
self.n_hidden5 = n_hidden5
self.alpha = alpha
self.noise = noise
self.untied = untied
def random_init(self, r, size):
return numpy.array(self.rng.uniform(low = -r, high = r, size=size))
def sigmoid(self, x):
return 1. / (1. + numpy.exp(-x))
def sigmoid_prime(self, x):
return x * (1. - x)
def corrupt(self, x, noise):
return self.rng.binomial(size = x.shape, n = 1, p = 1.0 - noise) * x
def encode(self, W, x, b ):
return self.sigmoid(numpy.dot( W, x) + b )
def decode(self, W, y, b ):
return self.sigmoid(numpy.dot( W, y) + b )
def get_cost(self, x, z): #cross entropy
eps = 1e-10
return -numpy.sum((x * numpy.log(z + eps) + (1.-x) * numpy.log(1.-z + eps)))
def get_cost_and_grad1(self, x_batch, dnum):
cost = 0.
grad_W1 = numpy.zeros(self.W1.shape)
grad_W2 = numpy.zeros(self.W2.shape)
grad_b1 = numpy.zeros(self.b1.shape)
grad_b2 = numpy.zeros(self.b2.shape)
for x in x_batch:
tilde_x = self.corrupt(x, self.noise)
p = self.encode(self.W1, tilde_x, self.b1)
y = self.decode(self.W2, p, self.b2)
cost += self.get_cost(x,y)
delta1 = - (x - y)
if self.untied:
grad_W2 += numpy.outer(delta1, p)
else:
grad_W1 += numpy.outer(delta1, p).T
grad_b2 += delta1
delta2 = numpy.dot(self.W2.T, delta1) * self.sigmoid_prime(p)
grad_W1 += numpy.outer(delta2, tilde_x)
grad_b1 += delta2
cost /= len(x_batch)
grad_W1 /= len(x_batch)
grad_W2 /= len(x_batch)
grad_b1 /= len(x_batch)
grad_b2 /= len(x_batch)
return cost, grad_W1, grad_W2, grad_b1, grad_b2
def get_cost_and_grad2(self, x_batch, dnum):
cost = 0.
grad_W3 = numpy.zeros(self.W3.shape)
grad_W4 = numpy.zeros(self.W4.shape)
grad_b3 = numpy.zeros(self.b3.shape)
grad_b4 = numpy.zeros(self.b4.shape)
for x in x_batch:
tilde_x = self.corrupt(x, self.noise)
p = self.encode(self.W3, tilde_x, self.b3 )
y = self.decode(self.W4, p, self.b4)
cost += self.get_cost(x,y)
delta1 = - (x - y)
if self.untied:
grad_W4 += numpy.outer(delta1, p)
else:
grad_W3 += numpy.outer(delta1, p).T
grad_b4 += delta1
delta2 = numpy.dot(self.W4.T, delta1) * self.sigmoid_prime(p)
grad_W3 += numpy.outer(delta2, tilde_x)
grad_b3 += delta2
cost /= len(x_batch)
grad_W3 /= len(x_batch)
grad_W4 /= len(x_batch)
grad_b3 /= len(x_batch)
grad_b4 /= len(x_batch)
return cost, grad_W3, grad_W4, grad_b3, grad_b4
def get_cost_and_grad3(self, x_batch, dnum):
cost = 0.
grad_W5 = numpy.zeros(self.W5.shape)
grad_W6 = numpy.zeros(self.W6.shape)
grad_b5 = numpy.zeros(self.b5.shape)
grad_b6 = numpy.zeros(self.b6.shape)
for x in x_batch:
tilde_x = self.corrupt(x, self.noise)
p = self.encode(self.W5, tilde_x, self.b5 )
y = self.decode(self.W6, p, self.b6)
cost += self.get_cost(x,y)
delta1 = - (x - y)
if self.untied:
grad_W6 += numpy.outer(delta1, p)
else:
grad_W5 += numpy.outer(delta1, p).T
grad_b6 += delta1
delta2 = numpy.dot(self.W6.T, delta1) * self.sigmoid_prime(p)
grad_W5 += numpy.outer(delta2, tilde_x)
grad_b5 += delta2
cost /= len(x_batch)
grad_W5 /= len(x_batch)
grad_W6 /= len(x_batch)
grad_b5 /= len(x_batch)
grad_b6 /= len(x_batch)
return cost, grad_W5, grad_W6, grad_b5, grad_b6
def get_cost_and_grad4(self, x_batch, dnum):
cost = 0.
grad_W7 = numpy.zeros(self.W7.shape)
grad_W8 = numpy.zeros(self.W8.shape)
grad_b7 = numpy.zeros(self.b7.shape)
grad_b8 = numpy.zeros(self.b8.shape)
for x in x_batch:
tilde_x = self.corrupt(x, self.noise)
p = self.encode(self.W7, tilde_x, self.b7 )
y = self.decode(self.W8, p, self.b8)
cost += self.get_cost(x,y)
delta1 = - (x - y)
if self.untied:
grad_W8 += numpy.outer(delta1, p)
else:
grad_W7 += numpy.outer(delta1, p).T
grad_b8 += delta1
delta2 = numpy.dot(self.W8.T, delta1) * self.sigmoid_prime(p)
grad_W7 += numpy.outer(delta2, tilde_x)
grad_b7 += delta2
cost /= len(x_batch)
grad_W7 /= len(x_batch)
grad_W8 /= len(x_batch)
grad_b7 /= len(x_batch)
grad_b8 /= len(x_batch)
return cost, grad_W7, grad_W8, grad_b7, grad_b8
def get_cost_and_grad5(self, x_batch, dnum):
cost = 0.
grad_W9 = numpy.zeros(self.W9.shape)
grad_W10 = numpy.zeros(self.W10.shape)
grad_b9 = numpy.zeros(self.b9.shape)
grad_b10 = numpy.zeros(self.b10.shape)
for x in x_batch:
tilde_x = self.corrupt(x, self.noise)
p = self.encode(self.W9, tilde_x, self.b9 )
y = self.decode(self.W10, p, self.b10)
cost += self.get_cost(x,y)
delta1 = - (x - y)
if self.untied:
grad_W10 += numpy.outer(delta1, p)
else:
grad_W9 += numpy.outer(delta1, p).T
grad_b10 += delta1
delta2 = numpy.dot(self.W10.T, delta1) * self.sigmoid_prime(p)
grad_W9 += numpy.outer(delta2, tilde_x)
grad_b9 += delta2
cost /= len(x_batch)
grad_W9 /= len(x_batch)
grad_W10 /= len(x_batch)
grad_b9 /= len(x_batch)
grad_b10 /= len(x_batch)
return cost, grad_W9, grad_W10, grad_b9, grad_b10
def train1(self, X, epochs1 = 14, batch_size = 20, alpha=0.1):
batch_num = len(X) / batch_size
self.alpha=alpha
print "1st learning"
for epoch in range(epochs1):
total_cost = 0.0
for i in range(batch_num):
batch = X[i*batch_size : (i+1)*batch_size]
cost, gradW1, gradW2, gradb1, gradb2 = self.get_cost_and_grad1(batch, len(X))
total_cost += cost
self.W1 -= self.alpha * gradW1
self.W2 -= self.alpha * gradW2
self.b1 -= self.alpha * gradb1
self.b2 -= self.alpha * gradb2
grad_sum = gradW1.sum() + gradW2.sum() + gradb1.sum() + gradb2.sum()
#print epoch,
print (1. / batch_num) * total_cost
return self.W1, self.W2, self.b1, self.b2
def train2(self, X, epochs2 = 20, batch_size = 20, alpha=0.1):
batch_num = len(X) / batch_size
self.alpha=alpha
print "2nd learning"
for epoch in range(epochs2):
total_cost = 0.0
for i in range(batch_num):
batch = X[i*batch_size : (i+1)*batch_size]
cost, gradW3, gradW4, gradb3, gradb4 = self.get_cost_and_grad2(batch, len(X))
total_cost += cost
self.W3 -= self.alpha * gradW3
self.W4 -= self.alpha * gradW4
self.b3 -= self.alpha * gradb3
self.b4 -= self.alpha * gradb4
grad_sum = gradW3.sum() + gradW4.sum() + gradb3.sum() + gradb4.sum()
#print epoch,
print (1. / batch_num) * total_cost
return self.W3, self.W4, self.b3, self.b4
def train3(self, X, epochs3 = 13, batch_size = 20, alpha=0.1):
batch_num = len(X) / batch_size
self.alpha=alpha
print "3rd learning"
for epoch in range(epochs3):
total_cost = 0.0
for i in range(batch_num):
batch = X[i*batch_size : (i+1)*batch_size]
cost, gradW5, gradW6, gradb5, gradb6 = self.get_cost_and_grad3(batch, len(X))
total_cost += cost
self.W5 -= self.alpha * gradW5
self.W6 -= self.alpha * gradW6
self.b5 -= self.alpha * gradb5
self.b6 -= self.alpha * gradb6
grad_sum = gradW5.sum() + gradW6.sum() + gradb5.sum() + gradb6.sum()
#print epoch,
print (1. / batch_num) * total_cost
return self.W5, self.W6, self.b5, self.b6
def train4(self, X, epochs4 = 12, batch_size = 20, alpha=0.1):
batch_num = len(X) / batch_size
self.alpha=alpha
print "4th learning"
for epoch in range(epochs4):
total_cost = 0.0
for i in range(batch_num):
batch = X[i*batch_size : (i+1)*batch_size]
cost, gradW7, gradW8, gradb7, gradb8 = self.get_cost_and_grad4(batch, len(X))
total_cost += cost
self.W7 -= self.alpha * gradW7
self.W8 -= self.alpha * gradW8
self.b7 -= self.alpha * gradb7
self.b8 -= self.alpha * gradb8
grad_sum = gradW7.sum() + gradW8.sum() + gradb7.sum() + gradb8.sum()
#print epoch,
print (1. / batch_num) * total_cost
return self.W7, self.W8, self.b7, self.b8
def train5(self, X, epochs5 = 11, batch_size = 20, alpha=0.1):
batch_num = len(X) / batch_size
self.alpha=alpha
print "5th learning"
for epoch in range(epochs5):
total_cost = 0.0
for i in range(batch_num):
batch = X[i*batch_size : (i+1)*batch_size]
cost, gradW9, gradW10, gradb9, gradb10 = self.get_cost_and_grad5(batch, len(X))
total_cost += cost
self.W9 -= self.alpha * gradW9
self.W10 -= self.alpha * gradW10
self.b9 -= self.alpha * gradb9
self.b10 -= self.alpha * gradb10
grad_sum = gradW9.sum() + gradW10.sum() + gradb9.sum() + gradb10.sum()
#print epoch,
print (1. / batch_num) * total_cost
return self.W9, self.W10, self.b9, self.b10
def reshape_b(self,x,Y):
while len(x) < len(Y):
x = numpy.vstack([x,x])
x = x[:len(Y)]
x = x.T
return x
def dump_weights(self, save_path):
with open(save_path, 'w') as f:
d = {
"W1" : self.W1,
"W2" : self.W2,
"W3" : self.W3,
"W4" : self.W4,
"W5" : self.W5,
"W6" : self.W6,
"W7" : self.W7,
"W8" : self.W8,
"W9" : self.W9,
"W10" : self.W10,
"b1" : self.b1,
"b2" : self.b2,
"b3" : self.b3,
"b4" : self.b4,
"b5" : self.b5,
"b6" : self.b6,
"b7" : self.b7,
"b8" : self.b8,
"b9" : self.b9,
"b10" : self.b10,
"n_visible" : self.n_visible,
"n_hidden1" : self.n_hidden1,
"n_hidden2" : self.n_hidden2,
"n_hidden3" : self.n_hidden3,
"n_hidden4" : self.n_hidden4,
"n_hidden5" : self.n_hidden5,
}
pickle.dump(d, f)
def visualize_weights(self, X):
tile_size = (int(numpy.sqrt(X[0].size)), int(numpy.sqrt(X[0].size)))
panel_shape = (int(numpy.sqrt(X.shape[0])), int(numpy.sqrt(X.shape[0])))
return utils.visualize_weights(X, panel_shape, tile_size)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("n_hidden1", type = int, default = 100)
parser.add_argument("n_hidden2", type = int, default = 81 )
parser.add_argument("n_hidden3", type = int, default = 64 )
parser.add_argument("n_hidden4", type = int, default = 49 )
parser.add_argument("n_hidden5", type = int, default = 36 )
parser.add_argument("-e1", "--epochs1", type = int, default = 14)
parser.add_argument("-e2", "--epochs2", type = int, default = 20)
parser.add_argument("-e3", "--epochs3", type = int, default = 13)
parser.add_argument("-e4", "--epochs4", type = int, default = 12)
parser.add_argument("-e5", "--epochs5", type = int, default = 11)
parser.add_argument("-b", "--batch_size", type = int, default = 20)
parser.add_argument("-n", "--noise", type=float, choices=[i/10. for i in xrange(11)], default = 0.0)
parser.add_argument("-a", "--alpha", type = float, choices=[i/400. for i in xrange(401)], default = 0.1)
parser.add_argument('-o', '--output', type = unicode)
parser.add_argument('-v', '--visualize', action = "store_true")
parser.add_argument('-u', '--untied', action = "store_true")
args = parser.parse_args()
train_data, test_data, valid_data = utils.load_data()
ae = Autoencoder3(n_hidden1 = args.n_hidden1, n_hidden2 = args.n_hidden2, n_hidden3 = args.n_hidden3,
n_hidden4 = args.n_hidden4, n_hidden5 = args.n_hidden5,
noise = args.noise, untied = args.untied)
try:
ae.train1(train_data[0], epochs1 = args.epochs1, alpha = args.alpha, batch_size = args.batch_size)
ae.b1 = ae.reshape_b(ae.b1,train_data[0])
Y = ae.encode(ae.W1, train_data[0].T, ae.b1)
ae.train2( Y.T , epochs2 = args.epochs2, alpha = args.alpha, batch_size = args.batch_size)
ae.b3 = ae.reshape_b(ae.b3,train_data[0])
Y = ae.encode(ae.W3, Y, ae.b3)
ae.train3( Y.T , epochs3 = args.epochs3, alpha = args.alpha, batch_size = args.batch_size)
ae.b5 = ae.reshape_b(ae.b5,train_data[0])
Y = ae.encode(ae.W5, Y, ae.b5)
ae.train4( Y.T , epochs4 = args.epochs4, alpha = args.alpha, batch_size = args.batch_size)
ae.b7 = ae.reshape_b(ae.b7,train_data[0])
Y = ae.encode(ae.W7, Y, ae.b7)
ae.train5( Y.T , epochs5 = args.epochs5, alpha = args.alpha, batch_size = args.batch_size)
except KeyboardInterrupt:
exit()
pass
save_name = args.output
if save_name == None:
save_name = '%sh1%d_h2%d_h3%d_h4%d_h5%d_e1%d_e2%d_e3%d_e4%d_e5%d_b%d_n%d_a1%d'%(
'untied_' if args.untied else 'tied_',
args.n_hidden1,
args.n_hidden2,
args.n_hidden3,
args.n_hidden4,
args.n_hidden5,
args.epochs1,
args.epochs2,
args.epochs3,
args.epochs4,
args.epochs5,
args.batch_size,
args.noise*100,
args.alpha,
)
ae.dump_weights(save_name + "5" + '.pkl')
import numpy
import argparse
import cPickle as pickle
import utils
def sigmoid(x):
return 1. / (1. + numpy.exp(-x))
def visualize_weights(X):
tile_size = (int(numpy.sqrt(X[0].size)), int(numpy.sqrt(X[0].size)))
panel_shape = (int(numpy.sqrt(X.shape[0])), int(numpy.sqrt(X.shape[0])))
return utils.visualize_weights(X, panel_shape, tile_size)
def load():
with open('tied_h1169_h2100_h364_h449_h536_e115_e215_e315_e415_e515_b20_n40_a105.pkl', 'r') as f: #hard coading!!
w = pickle.load(f)
W2 = w['W2']
W4 = w['W4']
W6 = w['W6']
W8 = w['W8']
W10 = w['W10']
b2 = w['b2']
b4 = w['b4']
b6 = w['b6']
b8 = w['b8']
b10 = w['b10']
n_visible = w['n_visible']
n_hidden1 = w['n_hidden1']
n_hidden2 = w['n_hidden2']
n_hidden3 = w['n_hidden3']
n_hidden4 = w['n_hidden4']
n_hidden5 = w['n_hidden5']
return W2,W4,W6,W8,W10,b2,b4,b6,b8,b10,n_visible,n_hidden1,n_hidden2,n_hidden3,n_hidden4,n_hidden5
def decode_image(W2,W4,W6,W8,W10,b2,b4,b6,b8,b10,n_hidden1,n_hidden2,n_hidden3,n_hidden4,n_hidden5):
for i in range(0,9): #hard coding!!
b2 = numpy.vstack([b2, b2] )
b4 = numpy.vstack([b4, b4] )
b6 = numpy.vstack([b6, b6] )
b8 = numpy.vstack([b8, b8] )
b10 = numpy.vstack([b10, b10] )
b2 = b2.T
b4 = b4.T
b6 = b6.T
b8 = b8.T
b10 = b10.T
b2 = b2[:,:n_hidden1]
b4 = b4[:,:n_hidden2]
b6 = b6[:,:n_hidden3]
b8 = b8[:,:n_hidden4]
b10 = b10[:,:n_hidden5]
E1 = numpy.eye(n_hidden1)
E2 = numpy.eye(n_hidden2)
E3 = numpy.eye(n_hidden3)
E4 = numpy.eye(n_hidden4)
E5 = numpy.eye(n_hidden5)
#decode the 1st layer
decode1 = sigmoid(numpy.dot(W2,E1)+b2).T
#decode the 2nd layer
b2 = b2[:,:n_hidden2]
X = sigmoid(numpy.dot(W4,E2)+b4)
decode2 = sigmoid(numpy.dot(W2,X)+b2).T
#decode the 3rd layer
b2 = b2[:,:n_hidden3]
b4 = b4[:,:n_hidden3]
X = sigmoid(numpy.dot(W6,E3)+b6)
X = sigmoid(numpy.dot(W4,X)+b4)
decode3 = sigmoid(numpy.dot(W2,X)+b2).T
#decode the 4th layer
b2 = b2[:,:n_hidden4]
b4 = b4[:,:n_hidden4]
b6 = b6[:,:n_hidden4]
X = sigmoid(numpy.dot(W8,E4)+b8)
X = sigmoid(numpy.dot(W6,X)+b6)
X = sigmoid(numpy.dot(W4,X)+b4)
decode4 = sigmoid(numpy.dot(W2,X)+b2).T
#decode the 5th layer
b2 = b2[:,:n_hidden5]
b4 = b4[:,:n_hidden5]
b6 = b6[:,:n_hidden5]
b8 = b8[:,:n_hidden5]
X = sigmoid(numpy.dot(W10,E5)+b10)
X = sigmoid(numpy.dot(W8,X)+b8)
X = sigmoid(numpy.dot(W6,X)+b6)
X = sigmoid(numpy.dot(W4,X)+b4)
decode5 = sigmoid(numpy.dot(W2,X)+b2).T
return decode1, decode2, decode3, decode4, decode5
if __name__ == "__main__":
W2,W4,W6,W8,W10,b2,b4,b6,b8,b10,n_visible,n_hidden1,n_hidden2,n_hidden3,n_hidden4,n_hidden5 = load()
decode1,decode2,decode3,decode4,decode5 =decode_image(W2,W4,W6,W8,W10,b2,b4,b6,b8,b10,n_visible,n_hidden1,n_hidden2,n_hidden3,n_hidden4,n_hidden5)
img1 = visualize_weights(decode1)
img1.save("decode1" + ".bmp")
img2 = visualize_weights(decode2)
img2.save("decode2" + ".bmp")
img3 = visualize_weights(decode3)
img3.save("decode3" + ".bmp")
img4 = visualize_weights(decode4)
img4.save("decode4" + ".bmp")
img5 = visualize_weights(decode5)
img5.save("decode5" + ".bmp")
import numpy
import cPickle as pickle
import gzip
import Image
def load_data():
with gzip.open('mnist.pkl.gz', 'rb') as f:
tr,te,vl = pickle.load(f)
return tr, te, vl
def visualize_weights(weights, panel_shape, tile_size):
def scale(x):
eps = 1e-8
x = x.copy()
x -= x.min()
x *= 1.0 / (x.max() + eps)
return 255.0*x
margin_y = numpy.zeros(tile_size[0])
margin_x = numpy.zeros((tile_size[0] + 1) * panel_shape[0])
image = margin_x.copy()
for y in range(panel_shape[1]):
foo = [ numpy.c_[ scale( x.reshape(tile_size) ), margin_y ]
for x in weights[y*panel_shape[0] :(y+1)*panel_shape[0]]]
tmp = numpy.hstack( foo )
tmp = numpy.vstack([tmp, margin_x])
image = numpy.vstack([image, tmp])
img = Image.fromarray(image)
img = img.convert('RGB')
return img
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment