matsuken92 · July 8, 2021 21:10
diff --git a/01_calculate.py b/01_calculate.py
 # these code on this page are based on the following chainer's example. Thanks!
 # https://github.com/pfnet/chainer/tree/master/examples/mnist/train_mnist.py

 %matplotlib inline
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.datasets import fetch_mldata
 from chainer import cuda, Variable, FunctionSet, optimizers
 import chainer.functions  as F
 import sys, time, math

 plt.style.use('ggplot')

 # draw a image of handwriting number
 def draw_digit_ae(data, n, row, col, _type):
    size = 28
    plt.subplot(row, col, n)
    Z = data.reshape(size,size)   # convert from vector to 28x28 matrix
    Z = Z[::-1,:]                 # flip vertical
    plt.xlim(0,28)
    plt.ylim(0,28)
    plt.pcolor(Z)
    plt.title("type=%s"%(_type), size=8)
    plt.gray()
    plt.tick_params(labelbottom="off")
    plt.tick_params(labelleft="off")

 # 確率的勾配降下法で学習させる際の１回分のバッチサイズ
 batchsize = 100

 # 学習の繰り返し回数
 n_epoch   = 30

 # 中間層の数
 n_units   = 1000

 # ノイズ付加有無
 noised = False

 # MNISTの手書き数字データのダウンロード
 # #HOME/scikit_learn_data/mldata/mnist-original.mat にキャッシュされる
 print 'fetch MNIST dataset'
 mnist = fetch_mldata('MNIST original')

 # mnist.data : 70,000件の784次元ベクトルデータ
 mnist.data   = mnist.data.astype(np.float32)
 mnist.data  /= 255     # 0-1のデータに変換

 # mnist.target : 正解データ（教師データ）
 mnist.target = mnist.target.astype(np.int32)

 # 学習用データを N個、検証用データを残りの個数と設定
 N = 60000
 y_train, y_test = np.split(mnist.data.copy(),   [N])
 N_test = y_test.shape[0]

 if noised:
    # Add noise
    noise_ratio = 0.2
    for data in mnist.data:
        perm = np.random.permutation(mnist.data.shape[1])[:int(mnist.data.shape[1]*noise_ratio)]
        data[perm] = 0.0
    
 x_train, x_test = np.split(mnist.data,   [N])

 # AutoEncoderのモデルの設定
 # 入力 784次元、出力 784次元, 2層
 model = FunctionSet(l1=F.Linear(784, n_units),
                    l2=F.Linear(n_units, 784))

 # Neural net architecture
 def forward(x_data, y_data, train=True):
    x, t = Variable(x_data), Variable(y_data)
    y = F.dropout(F.relu(model.l1(x)),  train=train)
    x_hat  = F.dropout(model.l2(y),  train=train)
    # 誤差関数として二乗誤差関数を用いる
    return F.mean_squared_error(x_hat, t)

 # Setup optimizer
 optimizer = optimizers.Adam()
 optimizer.setup(model.collect_parameters())


 l1_W = []
 l2_W = []

 l1_b = []
 l2_b = []

 train_loss = []
 test_loss = []
 test_mean_loss = []

 prev_loss = -1
 loss_std = 0

 loss_rate = []

 # Learning loop
 for epoch in xrange(1, n_epoch+1):
    print 'epoch', epoch
    start_time = time.clock()

    # training
    perm = np.random.permutation(N)
    sum_loss = 0
    for i in xrange(0, N, batchsize):
        x_batch = x_train[perm[i:i+batchsize]]
        y_batch = y_train[perm[i:i+batchsize]]
        
        optimizer.zero_grads()
        loss = forward(x_batch, y_batch)
        loss.backward()
        optimizer.update()

        train_loss.append(loss.data)
        sum_loss     += float(cuda.to_cpu(loss.data)) * batchsize

    print '\ttrain mean loss={} '.format(sum_loss / N)
    
    # evaluation
    sum_loss     = 0
    for i in xrange(0, N_test, batchsize):
        x_batch = x_test[i:i+batchsize]
        y_batch = y_test[i:i+batchsize]
        loss = forward(x_batch, y_batch, train=False)

        test_loss.append(loss.data)
        sum_loss     += float(cuda.to_cpu(loss.data)) * batchsize

    loss_val = sum_loss / N_test
    
    print '\ttest  mean loss={}'.format(loss_val)
    if epoch == 1:
        loss_std = loss_val
        loss_rate.append(100)
    else:
        print '\tratio :%.3f'%(loss_val/loss_std * 100)
        loss_rate.append(loss_val/loss_std * 100)
        
    if prev_loss >= 0:
        diff = loss_val - prev_loss
        ratio = diff/prev_loss * 100
        print '\timpr rate:%.3f'%(-ratio)
    
    prev_loss = sum_loss / N_test
    test_mean_loss.append(loss_val)
    
    l1_W.append(model.l1.W)
    l2_W.append(model.l2.W)
    end_time = time.clock()
    print "\ttime = %.3f" %(end_time-start_time)

 # Draw mean loss graph
 plt.style.use('ggplot')
 plt.figure(figsize=(10,7))
 plt.plot(test_mean_loss, lw=1)
 plt.title("")
 plt.ylabel("mean loss")
 plt.show()
 plt.xlabel("epoch")
diff --git a/02_draw_input_output.py b/02_draw_input_output.py
 # 入力と出力を可視化
 plt.style.use('fivethirtyeight')

 plt.figure(figsize=(15,25))

 num = 100
 cnt = 0
 ans_list  = []
 pred_list = []
 for idx in np.random.permutation(N_test)[:num]:
    xxx = x_test[idx].astype(np.float32)
    h1 = F.dropout(F.relu(model.l1(Variable(xxx.reshape(1,784)))),  train=False)
    y  = model.l2(h1)
    cnt+=1
    ans_list.append(x_test[idx])
    pred_list.append(y)

 cnt = 0
 for i in range(int(num/10)):
    for j in range (10):
        img_no = i*10+j
        pos = (2*i)*10+j
        draw_digit_ae(ans_list[img_no],  pos+1, 20, 10, "ans")
        
    for j in range (10):
        img_no = i*10+j
        pos = (2*i+1)*10+j
        draw_digit_ae(pred_list[i*10+j].data, pos+1, 20, 10, "pred")
    
 plt.show
diff --git a/03_draw_w_1.py b/03_draw_w_1.py
 # W(1)を可視化
 plt.style.use('fivethirtyeight')
 # draw digit images
 def draw_digit_w1(data, n, i, length):
    size = 28
    plt.subplot(math.ceil(length/15), 15, n)
    Z = data.reshape(size,size)   # convert from vector to 28x28 matrix
    Z = Z[::-1,:]                 # flip vertical
    plt.xlim(0,size)
    plt.ylim(0,size)
    plt.pcolor(Z)
    plt.title("%d"%i, size=9)
    plt.gray()
    plt.tick_params(labelbottom="off")
    plt.tick_params(labelleft="off")


 plt.figure(figsize=(15,70))
 cnt = 1
 for i in range(len(l1_W[9])):
    draw_digit_w1(l1_W[9][i], cnt, i, len(l1_W[9][i]))
    cnt += 1
    
 plt.show()
diff --git a/04_draw_w2.py b/04_draw_w2.py
 # W(2).Tを可視化
 plt.style.use('fivethirtyeight')
 # draw digit images

 def draw_digit2(data, i, length):
    size = 28
    plt.subplot(math.ceil(length/15)+1, 15, i+1)
    Z = data.reshape(size,size)   # convert from vector to 28x28 matrix
    Z = Z[::-1,:]                 # flip vertical
    plt.xlim(0,27)
    plt.ylim(0,27)
    plt.pcolor(Z)
    plt.title("%d"%i, size=9)
    plt.gray()
    plt.tick_params(labelbottom="off")
    plt.tick_params(labelleft="off")

 W_T = np.array(l2_W[9]).T

 plt.figure(figsize=(15,30))
 for i in range(W_T.shape[0]):
    draw_digit2(W_T[i], i, W_T.shape[0])
    
 plt.show()
	# these code on this page are based on the following chainer's example. Thanks!
	# https://github.com/pfnet/chainer/tree/master/examples/mnist/train_mnist.py

	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn.datasets import fetch_mldata
	from chainer import cuda, Variable, FunctionSet, optimizers
	import chainer.functions as F
	import sys, time, math

	plt.style.use('ggplot')

	# draw a image of handwriting number
	def draw_digit_ae(data, n, row, col, _type):
	size = 28
	plt.subplot(row, col, n)
	Z = data.reshape(size,size) # convert from vector to 28x28 matrix
	Z = Z[::-1,:] # flip vertical
	plt.xlim(0,28)
	plt.ylim(0,28)
	plt.pcolor(Z)
	plt.title("type=%s"%(_type), size=8)
	plt.gray()
	plt.tick_params(labelbottom="off")
	plt.tick_params(labelleft="off")

	# 確率的勾配降下法で学習させる際の１回分のバッチサイズ
	batchsize = 100

	# 学習の繰り返し回数
	n_epoch = 30

	# 中間層の数
	n_units = 1000

	# ノイズ付加有無
	noised = False

	# MNISTの手書き数字データのダウンロード
	# #HOME/scikit_learn_data/mldata/mnist-original.mat にキャッシュされる
	print 'fetch MNIST dataset'
	mnist = fetch_mldata('MNIST original')

	# mnist.data : 70,000件の784次元ベクトルデータ
	mnist.data = mnist.data.astype(np.float32)
	mnist.data /= 255 # 0-1のデータに変換

	# mnist.target : 正解データ（教師データ）
	mnist.target = mnist.target.astype(np.int32)

	# 学習用データを N個、検証用データを残りの個数と設定
	N = 60000
	y_train, y_test = np.split(mnist.data.copy(), [N])
	N_test = y_test.shape[0]

	if noised:
	# Add noise
	noise_ratio = 0.2
	for data in mnist.data:
	perm = np.random.permutation(mnist.data.shape[1])[:int(mnist.data.shape[1]*noise_ratio)]
	data[perm] = 0.0

	x_train, x_test = np.split(mnist.data, [N])

	# AutoEncoderのモデルの設定
	# 入力 784次元、出力 784次元, 2層
	model = FunctionSet(l1=F.Linear(784, n_units),
	l2=F.Linear(n_units, 784))

	# Neural net architecture
	def forward(x_data, y_data, train=True):
	x, t = Variable(x_data), Variable(y_data)
	y = F.dropout(F.relu(model.l1(x)), train=train)
	x_hat = F.dropout(model.l2(y), train=train)
	# 誤差関数として二乗誤差関数を用いる
	return F.mean_squared_error(x_hat, t)

	# Setup optimizer
	optimizer = optimizers.Adam()
	optimizer.setup(model.collect_parameters())


	l1_W = []
	l2_W = []

	l1_b = []
	l2_b = []

	train_loss = []
	test_loss = []
	test_mean_loss = []

	prev_loss = -1
	loss_std = 0

	loss_rate = []

	# Learning loop
	for epoch in xrange(1, n_epoch+1):
	print 'epoch', epoch
	start_time = time.clock()

	# training
	perm = np.random.permutation(N)
	sum_loss = 0
	for i in xrange(0, N, batchsize):
	x_batch = x_train[perm[i:i+batchsize]]
	y_batch = y_train[perm[i:i+batchsize]]

	optimizer.zero_grads()
	loss = forward(x_batch, y_batch)
	loss.backward()
	optimizer.update()

	train_loss.append(loss.data)
	sum_loss += float(cuda.to_cpu(loss.data)) * batchsize

	print '\ttrain mean loss={} '.format(sum_loss / N)

	# evaluation
	sum_loss = 0
	for i in xrange(0, N_test, batchsize):
	x_batch = x_test[i:i+batchsize]
	y_batch = y_test[i:i+batchsize]
	loss = forward(x_batch, y_batch, train=False)

	test_loss.append(loss.data)
	sum_loss += float(cuda.to_cpu(loss.data)) * batchsize

	loss_val = sum_loss / N_test

	print '\ttest mean loss={}'.format(loss_val)
	if epoch == 1:
	loss_std = loss_val
	loss_rate.append(100)
	else:
	print '\tratio :%.3f'%(loss_val/loss_std * 100)
	loss_rate.append(loss_val/loss_std * 100)

	if prev_loss >= 0:
	diff = loss_val - prev_loss
	ratio = diff/prev_loss * 100
	print '\timpr rate:%.3f'%(-ratio)

	prev_loss = sum_loss / N_test
	test_mean_loss.append(loss_val)

	l1_W.append(model.l1.W)
	l2_W.append(model.l2.W)
	end_time = time.clock()
	print "\ttime = %.3f" %(end_time-start_time)

	# Draw mean loss graph
	plt.style.use('ggplot')
	plt.figure(figsize=(10,7))
	plt.plot(test_mean_loss, lw=1)
	plt.title("")
	plt.ylabel("mean loss")
	plt.show()
	plt.xlabel("epoch")
	# 入力と出力を可視化
	plt.style.use('fivethirtyeight')

	plt.figure(figsize=(15,25))

	num = 100
	cnt = 0
	ans_list = []
	pred_list = []
	for idx in np.random.permutation(N_test)[:num]:
	xxx = x_test[idx].astype(np.float32)
	h1 = F.dropout(F.relu(model.l1(Variable(xxx.reshape(1,784)))), train=False)
	y = model.l2(h1)
	cnt+=1
	ans_list.append(x_test[idx])
	pred_list.append(y)

	cnt = 0
	for i in range(int(num/10)):
	for j in range (10):
	img_no = i*10+j
	pos = (2i)10+j
	draw_digit_ae(ans_list[img_no], pos+1, 20, 10, "ans")

	for j in range (10):
	img_no = i*10+j
	pos = (2i+1)10+j
	draw_digit_ae(pred_list[i*10+j].data, pos+1, 20, 10, "pred")

	plt.show
	# W(1)を可視化
	plt.style.use('fivethirtyeight')
	# draw digit images
	def draw_digit_w1(data, n, i, length):
	size = 28
	plt.subplot(math.ceil(length/15), 15, n)
	Z = data.reshape(size,size) # convert from vector to 28x28 matrix
	Z = Z[::-1,:] # flip vertical
	plt.xlim(0,size)
	plt.ylim(0,size)
	plt.pcolor(Z)
	plt.title("%d"%i, size=9)
	plt.gray()
	plt.tick_params(labelbottom="off")
	plt.tick_params(labelleft="off")


	plt.figure(figsize=(15,70))
	cnt = 1
	for i in range(len(l1_W[9])):
	draw_digit_w1(l1_W[9][i], cnt, i, len(l1_W[9][i]))
	cnt += 1

	plt.show()
	# W(2).Tを可視化
	plt.style.use('fivethirtyeight')
	# draw digit images

	def draw_digit2(data, i, length):
	size = 28
	plt.subplot(math.ceil(length/15)+1, 15, i+1)
	Z = data.reshape(size,size) # convert from vector to 28x28 matrix
	Z = Z[::-1,:] # flip vertical
	plt.xlim(0,27)
	plt.ylim(0,27)
	plt.pcolor(Z)
	plt.title("%d"%i, size=9)
	plt.gray()
	plt.tick_params(labelbottom="off")
	plt.tick_params(labelleft="off")

	W_T = np.array(l2_W[9]).T

	plt.figure(figsize=(15,30))
	for i in range(W_T.shape[0]):
	draw_digit2(W_T[i], i, W_T.shape[0])

	plt.show()