Created
January 20, 2016 11:18
-
-
Save TonyMooori/99d1f7a868ca100341c7 to your computer and use it in GitHub Desktop.
MNISTの自己組織化マップを作成するプログラム
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding:utf-8 | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import cv2 | |
import random | |
from sklearn.datasets import fetch_mldata | |
from sklearn.decomposition import RandomizedPCA | |
""" | |
参考: | |
[1]自己組織化特徴マップ(SOM) | |
http://www.sist.ac.jp/~kanakubo/research/neuro/selforganizingmap.html | |
[2]Pythonで逐次型自己組織化マップ - 理系大学生がPythonとJavaで色々頑張るブログ | |
http://emoson.hateblo.jp/entry/2015/02/16/034632 | |
[3]マインドウエア総研 | 技術情報 | SOMデータマイニング解説 | |
http://mindware-jp.com/basic/SOM_for_datamining.html | |
""" | |
class SOM: | |
def __init__(self, n_side , n_learn = 1000,learning_rate = 0.5 ): | |
""" | |
n_side: output_vectorの一辺の長さ | |
n_learn: 学習回数 | |
learing_rate: 学習に使う定数で,参考[1]の定数cに当たる | |
""" | |
self.n_side = n_side | |
self.n_learn = n_learn | |
self.learning_rate = learning_rate | |
self.n_weight = self.n_side * self.n_side | |
def fit(self, input_vector): | |
""" | |
学習を行うメソッド | |
input_vector: 変化させるベクトル | |
""" | |
input_vector = np.array(input_vector) # numpy.ndarrayにする | |
n_input = len(input_vector) # input_vectorの数を計算 | |
n_vector = input_vector.shape[1] # ベクトルの次元 | |
# points[i]にはoutput_vector[i]の要素のx座標とy座標が入っている(範囲は[0,1)) | |
points = np.array([[i//self.n_side,i%self.n_side] for i in range(self.n_weight)]) | |
points = points / (1.0 * self.n_side) | |
# 重みベクトルの初期化 | |
self.weight = np.zeros((self.n_weight,n_vector)) | |
# ランダムなインデックス | |
random_index = np.arange(n_input) | |
np.random.shuffle(random_index) | |
for t in range(self.n_learn): | |
print(t) | |
# 徐々に小さくなる数字(収束に使う) | |
alpha = 1.0 - float(t) / self.n_learn | |
# ランダムに一つ抽出 | |
vec = input_vector[ random_index[ t % n_input ] ] | |
# vecとweightの差 | |
diff = vec - self.weight | |
# 勝ちニューロンの要素番号を取得 | |
winner_index = np.argmin( np.linalg.norm(diff, axis=1) ) | |
# 勝ちニューロンのx,y座標を取得 | |
winner_point = points[winner_index] | |
# 勝ちニューロンとのx,y方向の差 | |
delta_point = points - winner_point | |
# 勝ちニューロンとの距離を計算 | |
dist = np.linalg.norm(delta_point,axis = 1) | |
# 近傍関数。距離が近いほど大きくなる | |
h = self.learning_rate * alpha * np.exp( - ( dist/alpha )**2 ) | |
# output_vectorの誤差を修正する | |
self.weight += np.atleast_2d(h).T * diff | |
if __name__ == "__main__": | |
# MNISTの画像の読み込み | |
mnist = fetch_mldata('MNIST original', data_home="..\\") | |
imgs = mnist.data | |
label = mnist.target | |
# 入力画像から20000件を抽出 | |
index = np.arange(len(imgs)) | |
np.random.shuffle(index) | |
input_vector = imgs[ index[:20000] ] | |
# SOMクラスの作成・学習 | |
n_side = 10 # 一辺の長さ | |
som = SOM(n_side,n_learn=5000,learning_rate = 0.75) | |
som.fit(input_vector) | |
# 重みベクトルの取得 | |
output_imgs = som.weight | |
# 順番通りに並べる | |
output_imgs = output_imgs.reshape(n_side,n_side,28,28) | |
tile = np.zeros((n_side*28, n_side*28)) | |
for x in range(n_side): | |
for y in range(n_side): | |
tile[(x*28):(x*28+28),(y*28):(y*28+28)] = output_imgs[x,y] | |
# 白黒反転 | |
tile = np.abs(255 - tile).astype(np.uint8) | |
# 画像の保存 | |
cv2.imwrite("tile.png",tile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment