Last active
February 25, 2019 07:08
-
-
Save fukatani/76f530e34a51ae2c2e2ea359f0f06c56 to your computer and use it in GitHub Desktop.
How to hyperparameter of deep learning by hyperopt.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
import matplotlib | |
matplotlib.use('Agg') | |
except ImportError: | |
pass | |
import chainer | |
import chainer.functions as F | |
import chainer.links as L | |
from chainer import training | |
from chainer.training import extensions | |
from hyperopt import fmin, tpe, hp | |
from hyperopt.pyll import scope | |
import numpy as np | |
# Network definition | |
class MLP(chainer.Chain): | |
def __init__(self, n_units1, n_units2, n_units3, n_units4, n_out, layer_num, activate): | |
super(MLP, self).__init__( | |
l1=L.Linear(None, n_units1), | |
l2=L.Linear(None, n_units2), | |
l3=L.Linear(None, n_units3), | |
l4=L.Linear(None, n_units4), | |
lfinal=L.Linear(None, n_out), | |
) | |
self.layer_num = layer_num | |
if activate == 'relu': | |
self.act = F.relu | |
else: | |
self.act = F.sigmoid | |
def __call__(self, x): | |
h1 = self.act(self.l1(x)) | |
h2 = self.act(self.l2(h1)) | |
if self.layer_num == 3: | |
return self.lfinal(h2) | |
h3 = self.act(self.l3(h2)) | |
if self.layer_num == 4: | |
return self.lfinal(h3) | |
h4 = self.act(self.l4(h3)) | |
if self.layer_num == 5: | |
return self.lfinal(h4) | |
def main(params): | |
epoch = 40 | |
gpu = 0 | |
n_out = 10 | |
batchsize = 100 | |
n_units1 = params['n_units1'] | |
n_units2 = params['n_units2'] | |
n_units3 = params['n_units3'] | |
n_units4 = params['n_units4'] | |
layer_num = params['layer_num'] | |
activate = params['activate'] | |
optimizer_name = params['optimizer_name'] | |
lr = params['lr'] | |
model = L.Classifier(MLP(n_units1, n_units2, n_units3, n_units4, n_out, layer_num, | |
activate)) | |
if gpu >= 0: | |
chainer.cuda.get_device(gpu).use() | |
model.to_gpu() | |
# Setup an optimizer | |
if optimizer_name == 'Adam': | |
optimizer = chainer.optimizers.Adam() | |
elif optimizer_name == 'AdaDelta': | |
optimizer = chainer.optimizers.AdaDelta() | |
else: | |
optimizer = chainer.optimizers.MomentumSGD(lr=lr) | |
optimizer.setup(model) | |
# optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) | |
# Load the MNIST dataset | |
train, test = chainer.datasets.get_mnist() | |
train_iter = chainer.iterators.SerialIterator(train, batchsize) | |
test_iter = chainer.iterators.SerialIterator(test, batchsize, | |
repeat=False, shuffle=False) | |
# Set up a trainer | |
updater = training.StandardUpdater(train_iter, optimizer, device=gpu) | |
trainer = training.Trainer(updater, (epoch, 'epoch'), out="result") | |
# Evaluate the model with the test dataset for each epoch | |
trainer.extend(extensions.Evaluator(test_iter, model, device=gpu)) | |
# Write a log of evaluation statistics for each epoch | |
trainer.extend(extensions.LogReport()) | |
# Save two plot images to the result dir | |
trainer.extend( | |
extensions.PlotReport(['main/loss', 'validation/main/loss'], | |
'epoch', file_name='loss.png')) | |
trainer.extend( | |
extensions.PlotReport( | |
['main/accuracy', 'validation/main/accuracy'], | |
'epoch', file_name='accuracy.png')) | |
# Write report | |
trainer.extend(extensions.PrintReport( | |
['epoch', 'main/loss', 'validation/main/loss', | |
'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) | |
# Print a progress bar to stdout | |
trainer.extend(extensions.ProgressBar()) | |
# Run the training | |
trainer.run() | |
valid_data = trainer._extensions['PlotReport'].extension._data | |
loss_data = [data for i, data in valid_data['validation/main/loss']] | |
best10_loss = sorted(loss_data)[:10] | |
return sum(best10_loss) | |
if __name__ == '__main__': | |
# params = {'n_units1': 200, | |
# 'n_units2': 200, | |
# 'n_units3': 200, | |
# 'n_units4': 200, | |
# 'layer_num': 3, | |
# 'activate': 'relu', | |
# 'optimizer_name': 'Adam', | |
# 'lr': 0.01, | |
# } | |
# main(params) | |
space = {'n_units1': scope.int(hp.quniform('n_units1', 100, 300, 50)), | |
'n_units2': scope.int(hp.quniform('n_units2', 100, 300, 50)), | |
'n_units3': scope.int(hp.quniform('n_units3', 100, 300, 50)), | |
'n_units4': scope.int(hp.quniform('n_units4', 100, 300, 50)), | |
'layer_num': scope.int(hp.quniform('layer_num', 3, 5, 1)), | |
'activate': hp.choice('activate', | |
('relu', 'sigmoid')), | |
'optimizer_name': hp.choice('optimizer_name', | |
('Adam', 'AdaDelta', 'MomentumSGD')), | |
'lr': hp.uniform('lr', 0.005, 0.02), | |
} | |
best = fmin(main, space, algo=tpe.suggest, max_evals=20) | |
print("best parameters", best) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment