TonyMooori · January 20, 2016 11:18
diff --git a/SOM_MNIST.py b/SOM_MNIST.py
 #coding:utf-8
 import matplotlib.pyplot as plt
 import numpy as np
 import cv2
 import random
 from sklearn.datasets import fetch_mldata
 from sklearn.decomposition import RandomizedPCA


 """
 参考:
 [1]自己組織化特徴マップ（SOM） 
 http://www.sist.ac.jp/~kanakubo/research/neuro/selforganizingmap.html

 [2]Pythonで逐次型自己組織化マップ - 理系大学生がPythonとJavaで色々頑張るブログ
 http://emoson.hateblo.jp/entry/2015/02/16/034632

 [3]マインドウエア総研 | 技術情報 | SOMデータマイニング解説
 http://mindware-jp.com/basic/SOM_for_datamining.html
 """

 class SOM:
 	def __init__(self, n_side , n_learn = 1000,learning_rate = 0.5 ):
 		"""
 		n_side:			output_vectorの一辺の長さ
 		n_learn:		学習回数
 		learing_rate:	学習に使う定数で，参考[1]の定数cに当たる
 		"""
 		self.n_side = n_side
 		self.n_learn = n_learn
 		self.learning_rate = learning_rate
 		self.n_weight = self.n_side * self.n_side
 		

 	def fit(self, input_vector):
 		"""
 		学習を行うメソッド
 		input_vector:	変化させるベクトル
 		"""
 		
 		input_vector = np.array(input_vector)	# numpy.ndarrayにする
 		n_input = len(input_vector)				# input_vectorの数を計算
 		n_vector = input_vector.shape[1]		# ベクトルの次元
 		
 		# points[i]にはoutput_vector[i]の要素のx座標とy座標が入っている(範囲は[0,1))
 		points = np.array([[i//self.n_side,i%self.n_side] for i in range(self.n_weight)])
 		points = points / (1.0 * self.n_side)
 		
 		# 重みベクトルの初期化
 		self.weight = np.zeros((self.n_weight,n_vector))
 		
 		# ランダムなインデックス
 		random_index = np.arange(n_input)
 		np.random.shuffle(random_index)
 		
 		for t in range(self.n_learn):
 			print(t)
 			
 			# 徐々に小さくなる数字(収束に使う)
 			alpha = 1.0 - float(t) / self.n_learn	
 			
 			# ランダムに一つ抽出
 			vec = input_vector[ random_index[ t % n_input ] ]
 			
 			# vecとweightの差
 			diff = vec - self.weight
 			
 			# 勝ちニューロンの要素番号を取得
 			winner_index = np.argmin( np.linalg.norm(diff, axis=1) )
 			
 			# 勝ちニューロンのx,y座標を取得
 			winner_point = points[winner_index]
 			
 			# 勝ちニューロンとのx,y方向の差
 			delta_point = points - winner_point
 			
 			# 勝ちニューロンとの距離を計算
 			dist = np.linalg.norm(delta_point,axis = 1)
 			
 			# 近傍関数。距離が近いほど大きくなる
 			h = self.learning_rate * alpha * np.exp( - ( dist/alpha )**2 )
 			
 			# output_vectorの誤差を修正する
 			self.weight += np.atleast_2d(h).T * diff

 if __name__ == "__main__":
 	# MNISTの画像の読み込み
 	mnist = fetch_mldata('MNIST original', data_home="..\\")
 	imgs = mnist.data
 	label = mnist.target

 	# 入力画像から20000件を抽出
 	index = np.arange(len(imgs))
 	np.random.shuffle(index)
 	input_vector = imgs[ index[:20000] ]

 	# SOMクラスの作成・学習
 	n_side = 10			# 一辺の長さ
 	som = SOM(n_side,n_learn=5000,learning_rate = 0.75)
 	som.fit(input_vector)

 	# 重みベクトルの取得
 	output_imgs = som.weight

 	# 順番通りに並べる
 	output_imgs = output_imgs.reshape(n_side,n_side,28,28)
 	tile = np.zeros((n_side*28, n_side*28))
 	for x in range(n_side):
 	    for y in range(n_side):
 	        tile[(x*28):(x*28+28),(y*28):(y*28+28)] = output_imgs[x,y]
 	
 	# 白黒反転
 	tile = np.abs(255 - tile).astype(np.uint8)
 	
 	# 画像の保存
 	cv2.imwrite("tile.png",tile)
	#coding:utf-8
	import matplotlib.pyplot as plt
	import numpy as np
	import cv2
	import random
	from sklearn.datasets import fetch_mldata
	from sklearn.decomposition import RandomizedPCA


	"""
	参考:
	[1]自己組織化特徴マップ（SOM）
	http://www.sist.ac.jp/~kanakubo/research/neuro/selforganizingmap.html

	[2]Pythonで逐次型自己組織化マップ - 理系大学生がPythonとJavaで色々頑張るブログ
	http://emoson.hateblo.jp/entry/2015/02/16/034632

	[3]マインドウエア総研 \| 技術情報 \| SOMデータマイニング解説
	http://mindware-jp.com/basic/SOM_for_datamining.html
	"""

	class SOM:
	def __init__(self, n_side , n_learn = 1000,learning_rate = 0.5 ):
	"""
	n_side: output_vectorの一辺の長さ
	n_learn: 学習回数
	learing_rate: 学習に使う定数で，参考[1]の定数cに当たる
	"""
	self.n_side = n_side
	self.n_learn = n_learn
	self.learning_rate = learning_rate
	self.n_weight = self.n_side * self.n_side


	def fit(self, input_vector):
	"""
	学習を行うメソッド
	input_vector: 変化させるベクトル
	"""

	input_vector = np.array(input_vector) # numpy.ndarrayにする
	n_input = len(input_vector) # input_vectorの数を計算
	n_vector = input_vector.shape[1] # ベクトルの次元

	# points[i]にはoutput_vector[i]の要素のx座標とy座標が入っている(範囲は[0,1))
	points = np.array([[i//self.n_side,i%self.n_side] for i in range(self.n_weight)])
	points = points / (1.0 * self.n_side)

	# 重みベクトルの初期化
	self.weight = np.zeros((self.n_weight,n_vector))

	# ランダムなインデックス
	random_index = np.arange(n_input)
	np.random.shuffle(random_index)

	for t in range(self.n_learn):
	print(t)

	# 徐々に小さくなる数字(収束に使う)
	alpha = 1.0 - float(t) / self.n_learn

	# ランダムに一つ抽出
	vec = input_vector[ random_index[ t % n_input ] ]

	# vecとweightの差
	diff = vec - self.weight

	# 勝ちニューロンの要素番号を取得
	winner_index = np.argmin( np.linalg.norm(diff, axis=1) )

	# 勝ちニューロンのx,y座標を取得
	winner_point = points[winner_index]

	# 勝ちニューロンとのx,y方向の差
	delta_point = points - winner_point

	# 勝ちニューロンとの距離を計算
	dist = np.linalg.norm(delta_point,axis = 1)

	# 近傍関数。距離が近いほど大きくなる
	h = self.learning_rate * alpha * np.exp( - ( dist/alpha )**2 )

	# output_vectorの誤差を修正する
	self.weight += np.atleast_2d(h).T * diff

	if __name__ == "__main__":
	# MNISTの画像の読み込み
	mnist = fetch_mldata('MNIST original', data_home="..\\")
	imgs = mnist.data
	label = mnist.target

	# 入力画像から20000件を抽出
	index = np.arange(len(imgs))
	np.random.shuffle(index)
	input_vector = imgs[ index[:20000] ]

	# SOMクラスの作成・学習
	n_side = 10 # 一辺の長さ
	som = SOM(n_side,n_learn=5000,learning_rate = 0.75)
	som.fit(input_vector)

	# 重みベクトルの取得
	output_imgs = som.weight

	# 順番通りに並べる
	output_imgs = output_imgs.reshape(n_side,n_side,28,28)
	tile = np.zeros((n_side28, n_side28))
	for x in range(n_side):
	for y in range(n_side):
	tile[(x28):(x28+28),(y28):(y28+28)] = output_imgs[x,y]

	# 白黒反転
	tile = np.abs(255 - tile).astype(np.uint8)

	# 画像の保存
	cv2.imwrite("tile.png",tile)