Created
March 12, 2018 14:49
-
-
Save EnsekiTT/301889ce5f55767b7ba7112ceb3ba93a to your computer and use it in GitHub Desktop.
FIML(完全情報最尤推定)の実験
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# References | |
# https://github.com/kamadak/fiml-py | |
# http://koumurayama.com/koujapanese/missing_data.pdf | |
# http://fixxman.hatenablog.com/entry/2015/10/10/224955 | |
import numpy as np | |
import scipy as sp | |
import scipy.optimize | |
def fiml(data): | |
size, dim = data.shape | |
mean0 = np.zeros(dim) | |
cov0 = np.eye(dim) | |
params0 = np.empty(dim + dim * (dim + 1) // 2) | |
params0[:dim] = mean0 | |
params0[dim:] = cov0[np.tril_indices(dim)] | |
obsmap = ~np.isnan(data) | |
sortedidx = sorted(range(data.shape[0]), key=lambda i: list(obsmap[i])) | |
blocks = [[sortedidx[0]]] | |
for idx, prev in zip(sortedidx[1:], sortedidx[:-1]): | |
if (obsmap[prev] == obsmap[idx]).all(): | |
blocks[-1].append(idx) | |
else: | |
blocks.append([idx]) | |
data_blocks = [(obsmap[b[0]], data[b][:, obsmap[b[0]]]) for b in blocks] | |
result = sp.optimize.fmin_slsqp( | |
_obj_func, params0, args=(dim, data_blocks), disp=True, iter=1000) | |
mean = result[0:dim] | |
cov = np.empty((dim, dim)) | |
ii, jj = np.tril_indices(dim) | |
cov[ii, jj] = result[dim:] | |
cov[jj, ii] = result[dim:] | |
return mean, cov | |
def _obj_func(params, dim, data_blocks): | |
mean = params[0:dim] | |
cov = np.empty((dim,dim)) | |
ii, jj = np.tril_indices(dim) | |
cov[ii, jj] = params[dim:] | |
cov[jj, ii] = params[dim:] | |
if (np.linalg.eigvalsh(cov) < 0).any(): | |
# XXX Returning inf is not a good idea, because many solvers | |
# cannot cope with it. | |
return np.inf | |
objval = 0.0 | |
for obs, obs_data in data_blocks: | |
obs_mean = mean[obs] | |
obs_cov = cov[obs][:, obs] | |
objval += _log_likelihood_composed(obs_data, obs_mean, obs_cov) | |
return -objval | |
# Composite function of _log_likelihood() and _pdf_normal(). | |
def _log_likelihood_composed(x, mean, cov): | |
xshift = x - mean | |
size = x.shape[0] if x.ndim == 2 else 1 | |
t1 = x.shape[-1] * np.log(2*np.pi) | |
sign, logdet = np.linalg.slogdet(cov) | |
t2 = logdet | |
t3 = xshift.dot(np.linalg.inv(cov)) * xshift | |
return -0.5 * ((t1 + t2) * size + t3.sum()) | |
data = np.array([ | |
[3, 83, np.nan], | |
[4, 85, np.nan], | |
[5, 95, np.nan], | |
[2, 96, np.nan], | |
[5, 103, 128], | |
[3, 104, 102], | |
[2, 109, 111], | |
[6, 112, 113], | |
[3, 115, 117], | |
[3, 116, 133]], dtype=np.float) | |
# 正規化 | |
mean_list = [] | |
var_list = [] | |
for i in range(data.shape[1]): | |
if ~(~np.isnan(data[:,i]).any()): | |
continue | |
mean_list.append(data[:,i].mean()) | |
var_list.append(data[:,i].var()) | |
data[:,i] = (data[:,i] - mean_list[i]) / var_list[i] | |
# 実行するとこ | |
mean, cov = fiml(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment