Created
November 10, 2016 09:36
-
-
Save taotao54321/e0291b0a76aa8bdb75f29fe3510ddee9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# 多層パーセプトロン(Multi Layer Perceptron) | |
# 入力層、出力層に隠れ層を1つ追加(PRMLではこれを2層と呼ぶらしい) | |
# bias抜きで入力層ユニット数2, 隠れ層ユニット数2, 出力層ユニット数1 | |
# AND, OR, XOR でテスト | |
# 勾配降下法により学習 | |
# | |
# 誤差関数 E = max(0, -y*u2) を使用。要は微分できればいいのだからSLPの | |
# 時と同じでいけるはず | |
# | |
# 隠れ層の活性化関数、および学習率はコマンドラインから指定する。シグモ | |
# イド関数で学習率 0.1 にすると安定して学習できるっぽい。なお、参考ま | |
# でに活性化関数リストに恒等関数も含めているが、これを使うと出力層のユ | |
# ニット値は結局入力値の線型結合になるので XOR は学習できない(線形分離 | |
# 不能だから)。 | |
import sys | |
import numpy as np | |
class Sigmoid: | |
def __init__(self): | |
pass | |
def __call__(self, x): | |
return self._apply(x) | |
def deriv(self, x): | |
value = self._apply(x) | |
return value * (1. - value) | |
def _apply(self, x): | |
return 1. / (1. + np.exp(-x)) | |
class Tanh: | |
def __init__(self): | |
pass | |
def __call__(self, x): | |
return self._apply(x) | |
def deriv(self, x): | |
value = self._apply(x) | |
return 1. - value**2 | |
def _apply(self, x): | |
return np.tanh(x) | |
class Square: | |
def __init__(self): | |
pass | |
def __call__(self, x): | |
return x * x | |
def deriv(self, x): | |
return 2. * x | |
class ReLU: | |
def __init__(self): | |
pass | |
def __call__(self, x): | |
return x * (x > 0) | |
def deriv(self, x): | |
return 1. * (x > 0) | |
class Identity: | |
def __init__(self): | |
pass | |
def __call__(self, x): | |
return x | |
def deriv(self, x): | |
return 1 | |
class MLP: | |
def __init__(self, activation): | |
self.activation = activation | |
# 重みを全部0初期化するとSLPと変わらなくなり、XOR を正しく学習 | |
# できないので注意 | |
#self.w1 = np.zeros((2,3)) | |
#self.w2 = np.zeros(3) | |
self.w1 = np.random.uniform(-1.0, 1.0, (2,3)) | |
self.w2 = np.random.uniform(-1.0, 1.0, 3) | |
def learn(self, x, y, eta): | |
"""1件の教師データから1回学習""" | |
# 現在のモデルを使って計算 | |
# 結果が正しければ return | |
u1 = self.w1.dot(x) | |
o1 = np.insert(self.activation(u1), 0, 1) | |
u2 = self.w2.dot(o1) | |
if self._activate_output(u2) == y: return | |
# 結果が誤っていたら勾配降下法により学習 | |
# 誤差関数 E = -y * u2 | |
# w2 の勾配 grad2 = -y * o1 | |
# w1 の勾配 grad1 = -y * [w2[1]*act.deriv(u1[0])*x, w2[2]*act.deriv(u1[1])*x] | |
grad2 = -y * o1 | |
grad1 = -y * np.array([ | |
self.w2[1] * self.activation.deriv(u1[0]) * x, | |
self.w2[2] * self.activation.deriv(u1[1]) * x | |
]) | |
self.w2 -= eta * grad2 | |
self.w1 -= eta * grad1 | |
def test(self, x): | |
u1 = self.w1.dot(x) | |
o1 = np.insert(self.activation(u1), 0, 1) | |
u2 = self.w2.dot(o1) | |
return self._activate_output(u2) | |
def _activate_output(self, u): | |
return 1 if u >= 0 else -1 | |
def test(name, xs, ys, activation, eta): | |
nn = MLP(activation) | |
print("#------------------------------------------------------------") | |
print("# Learning \"{}\" (eta={})".format(name, eta)) | |
for i in range(10000): | |
for x, y in zip(xs, ys): | |
nn.learn(x, y, eta) | |
#print("iteration {}: w1 = {}".format(i, nn.w1)) | |
#print("iteration {}: w2 = {}".format(i, nn.w2)) | |
print("w1 = {}".format(nn.w1)) | |
print("w2 = {}".format(nn.w2)) | |
print() | |
print("# Test \"{}\"".format(name)) | |
for x in xs: | |
o = nn.test(x) | |
print("{} -> {}".format(x, o)) | |
print() | |
XS = tuple(map(np.array, ( | |
(1, 0, 0), | |
(1, 0, 1), | |
(1, 1, 0), | |
(1, 1, 1), | |
))) | |
YS_AND = ( -1, -1, -1, 1 ) | |
YS_OR = ( -1, 1, 1, 1 ) | |
YS_XOR = ( -1, 1, 1, -1 ) | |
ACTIVATION_MAP = { | |
"sigmoid" : Sigmoid(), | |
"tanh" : Tanh(), | |
"square" : Square(), | |
"relu" : ReLU(), | |
"identity" : Identity(), | |
} | |
def error(msg): | |
sys.exit(msg) | |
def usage(): | |
error("Usage: mlp <activation> <eta>") | |
def main(): | |
if len(sys.argv) != 3: usage() | |
activation = ACTIVATION_MAP[sys.argv[1]] | |
eta = float(sys.argv[2]) | |
test("AND", XS, YS_AND, activation, eta) | |
test("OR", XS, YS_OR, activation, eta) | |
test("XOR", XS, YS_XOR, activation, eta) | |
if __name__ == "__main__": main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment