Created
September 9, 2017 10:10
-
-
Save musyoku/c4d59fc66bc39c66c8061cca2d966dda to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import chainer | |
| import numpy as np | |
| train, test = chainer.datasets.get_mnist() | |
| dataset = train._datasets[0] * 10 - 5 | |
| num_data = dataset.shape[0] | |
| def calculate_parameter(): | |
| mean = np.mean(dataset, axis=0) | |
| var = np.var(dataset, axis=0) | |
| return mean, var | |
| # 平均と分散を逐次的に計算するアルゴリズム | |
| # https://mathwords.net/tikujiheikin | |
| def _calculate_sample_mean_var(iteration=100, batchsize=64): | |
| mean = 0 | |
| var = 0 | |
| for i in range(iteration): | |
| indices = np.random.randint(0, num_data, size=batchsize) | |
| samples = dataset.take(indices, axis=0) | |
| sample_mean = np.mean(samples, axis=0) | |
| sample_var = np.var(samples, axis=0) | |
| scale = batchsize / ((i + 1) * batchsize) | |
| new_mean = (i * mean + sample_mean) * scale | |
| new_var = (i * (var + mean ** 2) + (sample_var + sample_mean ** 2)) * scale - new_mean ** 2 | |
| mean = new_mean | |
| var = new_var | |
| return mean, var | |
| # 漸化式バージョン | |
| # 精度が良い | |
| # https://qpp.bitbucket.io/post/variance/ | |
| def _recurrence_calculate_sample_mean_var(iteration=100, batchsize=64): | |
| mean = 0 | |
| nvar = 0 # 分散にnを掛けたもの | |
| for i in range(iteration): | |
| indices = np.random.randint(0, num_data, size=batchsize) | |
| samples = dataset.take(indices, axis=0) | |
| sample_sum = np.sum(samples, axis=0) | |
| sample_squared_sum = np.sum(samples ** 2, axis=0) | |
| new_mean = mean + (sample_sum - batchsize * mean) / ((i + 1) * batchsize) | |
| new_nvar = nvar + sample_squared_sum - sample_sum * (new_mean + mean) + batchsize * new_mean * mean | |
| mean = new_mean | |
| nvar = new_nvar | |
| return mean, nvar / ((i + 1) * batchsize) # nで割ると分散に戻る | |
| # 標本平均・標本分散を返す | |
| def calculate_sample_mean_var(iteration=100, batchsize=64, recurrence=True): | |
| if recurrence: | |
| return _recurrence_calculate_sample_mean_var(iteration, batchsize) | |
| return _calculate_sample_mean_var(iteration, batchsize) | |
| # 標本平均・不偏分散を返す | |
| def calculate_mean_and_unbiased_var(iteration=100, batchsize=64, recurrence=True): | |
| mean, var = calculate_sample_mean_var(iteration, batchsize, recurrence) | |
| n = iteration * batchsize | |
| scale = n / (n - 1) | |
| return mean, scale * var | |
| def main(): | |
| seed = np.random.randint(0, 20170909) | |
| population_mean, population_var = calculate_parameter() | |
| np.random.seed(seed) | |
| sample_mean, sample_var = calculate_sample_mean_var(recurrence=False) | |
| _, unbiased_var = calculate_mean_and_unbiased_var(recurrence=False) | |
| print("naive") | |
| print(" error(mean)") | |
| print(" sample", np.mean(abs(population_mean - sample_mean))) | |
| print(" error(var)") | |
| print(" sample", np.mean(abs(population_var - sample_var))) | |
| print(" unbiased", np.mean(abs(population_var - unbiased_var))) | |
| np.random.seed(seed) | |
| sample_mean, sample_var = calculate_sample_mean_var(recurrence=True) | |
| _, unbiased_var = calculate_mean_and_unbiased_var(recurrence=True) | |
| print("recurrence") | |
| print(" error(mean)") | |
| print(" sample", np.mean(abs(population_mean - sample_mean))) | |
| print(" error(var)") | |
| print(" sample", np.mean(abs(population_var - sample_var))) | |
| print(" unbiased", np.mean(abs(population_var - unbiased_var))) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment