taotao54321 · November 10, 2016 09:36
diff --git a/mlp.py b/mlp.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 # 多層パーセプトロン(Multi Layer Perceptron)
 # 入力層、出力層に隠れ層を1つ追加(PRMLではこれを2層と呼ぶらしい)
 # bias抜きで入力層ユニット数2, 隠れ層ユニット数2, 出力層ユニット数1
 # AND, OR, XOR でテスト

 # 勾配降下法により学習
 #
 # 誤差関数 E = max(0, -y*u2) を使用。要は微分できればいいのだからSLPの
 # 時と同じでいけるはず
 #
 # 隠れ層の活性化関数、および学習率はコマンドラインから指定する。シグモ
 # イド関数で学習率 0.1 にすると安定して学習できるっぽい。なお、参考ま
 # でに活性化関数リストに恒等関数も含めているが、これを使うと出力層のユ
 # ニット値は結局入力値の線型結合になるので XOR は学習できない(線形分離
 # 不能だから)。

 import sys

 import numpy as np


 class Sigmoid:
    def __init__(self):
        pass
    def __call__(self, x):
        return self._apply(x)
    def deriv(self, x):
        value = self._apply(x)
        return value * (1. - value)
    def _apply(self, x):
        return 1. / (1. + np.exp(-x))

 class Tanh:
    def __init__(self):
        pass
    def __call__(self, x):
        return self._apply(x)
    def deriv(self, x):
        value = self._apply(x)
        return 1. - value**2
    def _apply(self, x):
        return np.tanh(x)

 class Square:
    def __init__(self):
        pass
    def __call__(self, x):
        return x * x
    def deriv(self, x):
        return 2. * x

 class ReLU:
    def __init__(self):
        pass
    def __call__(self, x):
        return x * (x > 0)
    def deriv(self, x):
        return 1. * (x > 0)

 class Identity:
    def __init__(self):
        pass
    def __call__(self, x):
        return x
    def deriv(self, x):
        return 1


 class MLP:
    def __init__(self, activation):
        self.activation = activation
        # 重みを全部0初期化するとSLPと変わらなくなり、XOR を正しく学習
        # できないので注意
        #self.w1 = np.zeros((2,3))
        #self.w2 = np.zeros(3)
        self.w1 = np.random.uniform(-1.0, 1.0, (2,3))
        self.w2 = np.random.uniform(-1.0, 1.0, 3)

    def learn(self, x, y, eta):
        """1件の教師データから1回学習"""
        # 現在のモデルを使って計算
        # 結果が正しければ return
        u1 = self.w1.dot(x)
        o1 = np.insert(self.activation(u1), 0, 1)
        u2 = self.w2.dot(o1)
        if self._activate_output(u2) == y: return

        # 結果が誤っていたら勾配降下法により学習
        # 誤差関数 E = -y * u2
        # w2 の勾配 grad2 = -y * o1
        # w1 の勾配 grad1 = -y * [w2[1]*act.deriv(u1[0])*x, w2[2]*act.deriv(u1[1])*x]
        grad2 = -y * o1
        grad1 = -y * np.array([
            self.w2[1] * self.activation.deriv(u1[0]) * x,
            self.w2[2] * self.activation.deriv(u1[1]) * x
        ])
        self.w2 -= eta * grad2
        self.w1 -= eta * grad1

    def test(self, x):
        u1 = self.w1.dot(x)
        o1 = np.insert(self.activation(u1), 0, 1)
        u2 = self.w2.dot(o1)
        return self._activate_output(u2)

    def _activate_output(self, u):
        return 1 if u >= 0 else -1


 def test(name, xs, ys, activation, eta):
    nn = MLP(activation)

    print("#------------------------------------------------------------")
    print("# Learning \"{}\" (eta={})".format(name, eta))
    for i in range(10000):
        for x, y in zip(xs, ys):
            nn.learn(x, y, eta)
        #print("iteration {}: w1 = {}".format(i, nn.w1))
        #print("iteration {}: w2 = {}".format(i, nn.w2))
    print("w1 = {}".format(nn.w1))
    print("w2 = {}".format(nn.w2))
    print()

    print("# Test \"{}\"".format(name))
    for x in xs:
        o = nn.test(x)
        print("{} -> {}".format(x, o))
    print()


 XS = tuple(map(np.array, (
    (1, 0, 0),
    (1, 0, 1),
    (1, 1, 0),
    (1, 1, 1),
 )))

 YS_AND = ( -1, -1, -1,  1 )
 YS_OR  = ( -1,  1,  1,  1 )
 YS_XOR = ( -1,  1,  1, -1 )

 ACTIVATION_MAP = {
    "sigmoid"  : Sigmoid(),
    "tanh"     : Tanh(),
    "square"   : Square(),
    "relu"     : ReLU(),
    "identity" : Identity(),
 }

 def error(msg):
    sys.exit(msg)

 def usage():
    error("Usage: mlp <activation> <eta>")

 def main():
    if len(sys.argv) != 3: usage()
    activation = ACTIVATION_MAP[sys.argv[1]]
    eta = float(sys.argv[2])

    test("AND", XS, YS_AND, activation, eta)
    test("OR",  XS, YS_OR,  activation, eta)
    test("XOR", XS, YS_XOR, activation, eta)

 if __name__ == "__main__": main()
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	# 多層パーセプトロン(Multi Layer Perceptron)
	# 入力層、出力層に隠れ層を1つ追加(PRMLではこれを2層と呼ぶらしい)
	# bias抜きで入力層ユニット数2, 隠れ層ユニット数2, 出力層ユニット数1
	# AND, OR, XOR でテスト

	# 勾配降下法により学習
	#
	# 誤差関数 E = max(0, -y*u2) を使用。要は微分できればいいのだからSLPの
	# 時と同じでいけるはず
	#
	# 隠れ層の活性化関数、および学習率はコマンドラインから指定する。シグモ
	# イド関数で学習率 0.1 にすると安定して学習できるっぽい。なお、参考ま
	# でに活性化関数リストに恒等関数も含めているが、これを使うと出力層のユ
	# ニット値は結局入力値の線型結合になるので XOR は学習できない(線形分離
	# 不能だから)。

	import sys

	import numpy as np


	class Sigmoid:
	def __init__(self):
	pass
	def __call__(self, x):
	return self._apply(x)
	def deriv(self, x):
	value = self._apply(x)
	return value * (1. - value)
	def _apply(self, x):
	return 1. / (1. + np.exp(-x))

	class Tanh:
	def __init__(self):
	pass
	def __call__(self, x):
	return self._apply(x)
	def deriv(self, x):
	value = self._apply(x)
	return 1. - value**2
	def _apply(self, x):
	return np.tanh(x)

	class Square:
	def __init__(self):
	pass
	def __call__(self, x):
	return x * x
	def deriv(self, x):
	return 2. * x

	class ReLU:
	def __init__(self):
	pass
	def __call__(self, x):
	return x * (x > 0)
	def deriv(self, x):
	return 1. * (x > 0)

	class Identity:
	def __init__(self):
	pass
	def __call__(self, x):
	return x
	def deriv(self, x):
	return 1


	class MLP:
	def __init__(self, activation):
	self.activation = activation
	# 重みを全部0初期化するとSLPと変わらなくなり、XOR を正しく学習
	# できないので注意
	#self.w1 = np.zeros((2,3))
	#self.w2 = np.zeros(3)
	self.w1 = np.random.uniform(-1.0, 1.0, (2,3))
	self.w2 = np.random.uniform(-1.0, 1.0, 3)

	def learn(self, x, y, eta):
	"""1件の教師データから1回学習"""
	# 現在のモデルを使って計算
	# 結果が正しければ return
	u1 = self.w1.dot(x)
	o1 = np.insert(self.activation(u1), 0, 1)
	u2 = self.w2.dot(o1)
	if self._activate_output(u2) == y: return

	# 結果が誤っていたら勾配降下法により学習
	# 誤差関数 E = -y * u2
	# w2 の勾配 grad2 = -y * o1
	# w1 の勾配 grad1 = -y * [w2[1]act.deriv(u1[0])x, w2[2]act.deriv(u1[1])x]
	grad2 = -y * o1
	grad1 = -y * np.array([
	self.w2[1] * self.activation.deriv(u1[0]) * x,
	self.w2[2] * self.activation.deriv(u1[1]) * x
	])
	self.w2 -= eta * grad2
	self.w1 -= eta * grad1

	def test(self, x):
	u1 = self.w1.dot(x)
	o1 = np.insert(self.activation(u1), 0, 1)
	u2 = self.w2.dot(o1)
	return self._activate_output(u2)

	def _activate_output(self, u):
	return 1 if u >= 0 else -1


	def test(name, xs, ys, activation, eta):
	nn = MLP(activation)

	print("#------------------------------------------------------------")
	print("# Learning \"{}\" (eta={})".format(name, eta))
	for i in range(10000):
	for x, y in zip(xs, ys):
	nn.learn(x, y, eta)
	#print("iteration {}: w1 = {}".format(i, nn.w1))
	#print("iteration {}: w2 = {}".format(i, nn.w2))
	print("w1 = {}".format(nn.w1))
	print("w2 = {}".format(nn.w2))
	print()

	print("# Test \"{}\"".format(name))
	for x in xs:
	o = nn.test(x)
	print("{} -> {}".format(x, o))
	print()


	XS = tuple(map(np.array, (
	(1, 0, 0),
	(1, 0, 1),
	(1, 1, 0),
	(1, 1, 1),
	)))

	YS_AND = ( -1, -1, -1, 1 )
	YS_OR = ( -1, 1, 1, 1 )
	YS_XOR = ( -1, 1, 1, -1 )

	ACTIVATION_MAP = {
	"sigmoid" : Sigmoid(),
	"tanh" : Tanh(),
	"square" : Square(),
	"relu" : ReLU(),
	"identity" : Identity(),
	}

	def error(msg):
	sys.exit(msg)

	def usage():
	error("Usage: mlp <activation> <eta>")

	def main():
	if len(sys.argv) != 3: usage()
	activation = ACTIVATION_MAP[sys.argv[1]]
	eta = float(sys.argv[2])

	test("AND", XS, YS_AND, activation, eta)
	test("OR", XS, YS_OR, activation, eta)
	test("XOR", XS, YS_XOR, activation, eta)

	if __name__ == "__main__": main()