Last active
January 25, 2016 01:46
-
-
Save y-mitsui/0ea1317fe74eef14583b to your computer and use it in GitHub Desktop.
多重コレスポンデンス分析ライブラリ(https://github.com/esafak/mca)の使用例
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env python | |
| # -*- coding:utf-8 -*- | |
| """ | |
| 多重コレスポンデンス分析ライブラリ(https://github.com/esafak/mca)の使用例 | |
| """ | |
| import numpy as np | |
| import numpy.random as random | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import mca | |
| def sampling(num_sample): | |
| # x:個体 | |
| # y1 = x + e, e ~ N(0,1) | |
| # y2 = y1 + e, e ~ N(0,1) | |
| # noise ~ N(0,1) | |
| x = np.linspace(0,10,num_sample) | |
| y1 = x + random.randn(num_sample) | |
| y2 = y1 + random.randn(num_sample) | |
| noise = random.randn(num_sample) | |
| return (x,y1,y2,noise) | |
| # sampling | |
| x, y1, y2, noise = sampling(10000) | |
| # dicsrite | |
| sample = pd.DataFrame({"x":x, "y1":y1,"y2":y2, "noise":noise}) | |
| sample["x"] = sample["x"].apply(lambda row:0 if row < 3. else 1 if row < 6. else 2) | |
| sample["y1"] = sample["y1"].apply(lambda row:int(row < 6.)) | |
| sample["y2"] = sample["y2"].apply(lambda row:int(row < 6.)) | |
| sample["noise"] = sample["noise"].apply(lambda row:int(row < 0.)) | |
| # 相互情報量を求める。必要であればこれを元に特徴選択をする | |
| x_prob = sample.x.value_counts() / sample.x.shape[0] | |
| print "================== Mutual Information of features ==================" | |
| for feature in ["y1","y2","noise"]: | |
| cross_prob = pd.crosstab(sample.x, sample[feature]) / sample.x.shape[0] | |
| feature_prob = sample[feature].value_counts() / sample[feature].shape[0] | |
| mi = 0. | |
| for x_idx in x_prob.index: | |
| for feature_idx in feature_prob.index: | |
| prob2d = 1e-5 if cross_prob.loc[x_idx,feature_idx]==0. else cross_prob.loc[x_idx,feature_idx] | |
| mi += prob2d * np.log(prob2d / (x_prob[x_idx] * feature_prob[feature_idx])) | |
| print "%s:%f"%(feature,mi) | |
| print "===================================================================\n" | |
| # make cross table | |
| crossed = pd.DataFrame(columns=["y1_0","y1_1","y2_0","y2_1","noise_0","noise_1"]) | |
| for i in range(3): | |
| row = [0] * 6 | |
| indivisual_sample = sample[sample.x==i] | |
| stat = indivisual_sample.y1.value_counts() | |
| row[0] = stat.get(0) if stat.get(0) != None else 0 | |
| row[1] = stat.get(1) if stat.get(1) != None else 0 | |
| stat = indivisual_sample.y2.value_counts() | |
| row[2] = stat.get(0) if stat.get(0) != None else 0 | |
| row[3] = stat.get(1) if stat.get(1) != None else 0 | |
| stat = indivisual_sample.noise.value_counts() | |
| row[4] = stat.get(0) if stat.get(0) != None else 0 | |
| row[5] = stat.get(1) if stat.get(1) != None else 0 | |
| df_addition_row = pd.DataFrame([row]) | |
| df_addition_row.columns =["y1_0","y1_1","y2_0","y2_1","noise_0","noise_1"] | |
| df_addition_row.index =["indivisual" + str(i)] | |
| crossed = pd.concat([crossed,df_addition_row],axis=0) | |
| print "========================== Cross Table ============================" | |
| print crossed | |
| print "===================================================================\n" | |
| # mca | |
| mca_counts = mca.MCA(crossed,benzecri=False) | |
| indivisuals = mca_counts.fs_r(N=2) | |
| features = mca_counts.fs_c(N=2) | |
| # plot. Be careful difference of x range and y range | |
| # この例では実質的に第1軸目しか使われなかった為,第1軸目の距離だけに着目すれば良い | |
| plt.scatter(indivisuals[:,0], indivisuals[:,1], c='g',marker='o') | |
| labels = crossed.index | |
| for label,x,y in zip(labels,indivisuals[:,0],indivisuals[:,1]): | |
| plt.annotate(label,xy = (x, y)) | |
| plt.scatter(features[:,0], features[:,1], c='r',marker='^') | |
| labels = crossed.columns | |
| for label,x,y in zip(labels,features[:,0],features[:,1]): | |
| plt.annotate(label,xy = (x, y)) | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment