Created
January 28, 2019 22:34
-
-
Save tupui/58470ee1098b260bbed35af8667306ed to your computer and use it in GitHub Desktop.
Compute discrepancy for an elementary perturbation of a LHS - For statsmodels
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import lru_cache | |
import numpy as np | |
import copy | |
import openturns as ot | |
from statsmodels.tools import sequences | |
dim = 2 | |
n_samples = 20 | |
doe_init = np.array(ot.SobolSequence(dim).generate(n_samples)) | |
disc_init = sequences.discrepancy(doe_init) | |
print(f'Initial discrepancy: {disc_init}') | |
doe = copy.deepcopy(doe_init) | |
row_1, row_2, col = 5, 7, 1 | |
doe[row_1, col], doe[row_2, col] = doe[row_2, col], doe[row_1, col] | |
print(f'Full computation: {sequences.discrepancy(doe)}') | |
# @profile | |
def perturb_discrepancy(sample, i1, i2, k, disc, bounds=None): | |
"""Centered discrepancy after and elementary perturbation on a LHS. | |
An elementary perturbation consists of an exange of coordinates between | |
two points: ``sample[i1, k] <-> sample[i2, k]``. By construction, | |
this operation conserve the LHS properties. | |
Parameters | |
---------- | |
sample : array_like (n_samples, k_vars) | |
The sample (before permutation) to compute the discrepancy from. | |
i1 : int | |
The first line of the elementary permutation. | |
i2 : int | |
The second line of the elementary permutation. | |
k : int | |
The column of the elementary permutation. | |
disc : float | |
Centered discrepancy of the design before permutation. | |
bounds : tuple or array_like ([min, k_vars], [max, k_vars]) | |
Desired range of transformed data. The transformation apply the bounds | |
on the sample and not the theoretical space, unit cube. Thus min and | |
max values of the sample will coincide with the bounds. | |
Returns | |
------- | |
discrepancy : float | |
Centered discrepancy. | |
References | |
---------- | |
[1] Jin et al. "An efficient algorithm for constructing optimal design | |
of computer experiments", Journal of Statistical Planning and | |
Inference, 2005. | |
""" | |
sample = np.asarray(sample) | |
n_samples = sample.shape[0] | |
# Sample scaling from bounds to unit hypercube | |
if bounds is not None: | |
min_ = bounds.min(axis=0) | |
max_ = bounds.max(axis=0) | |
sample = (sample - min_) / (max_ - min_) | |
z_ij = sample - 0.5 | |
alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k])) | |
beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k])) | |
def g_i(i): | |
return np.prod(1 + abs(z_ij[i, :])) | |
def h_i(i): | |
return np.prod(1 + 0.5 * abs(z_ij[i, :]) - 0.5 * (z_ij[i, :] ** 2)) | |
# @lru_cache(maxsize=None) | |
# def c_ij_iter(i, j): | |
# # print(i, j) | |
# if i != j: | |
# # Eq (19) | |
# c_ = 0.5 * (2 + abs(z_ij[i, :]) + abs(z_ij[j, :]) - | |
# abs(z_ij[i, :] - z_ij[j, :])) | |
# c_ = np.prod(c_) / (n_samples ** 2) | |
# else: | |
# # Eq (20) | |
# c_ = g_i(i, z_ij) / (n_samples ** 2) - 2 * h_i(i, z_ij) / n_samples | |
# return c_ | |
# Eq (25), typo in the article g is missing | |
c_i1i1 = ((g_i(i1) * alpha) / (n_samples ** 2) - | |
2 * alpha * beta * h_i(i1) / n_samples) | |
# Eq (26), typo in the article n ** 2 | |
c_i2i2 = ((g_i(i2) / ((n_samples ** 2) * alpha)) - | |
(2 * h_i(i2) / (n_samples * alpha * beta))) | |
# c_ij | |
c_ij = [] | |
# Eq (19) | |
for i in range(n_samples): | |
c_ij.append(1. / n_samples ** 2 * np.prod(0.5 * (2. + abs(z_ij[i, :]) + abs(z_ij) - abs(z_ij[i, :] - z_ij)), axis=1)) | |
c_ij = np.array(c_ij) | |
# Eq (20) | |
diag = 1. / n_samples ** 2 * np.prod(1 + abs(z_ij), axis=1) - 2. / n_samples * np.prod(1. + 0.5 * abs(z_ij) - 0.5 * z_ij ** 2, axis=1) | |
idx = np.diag_indices(n_samples) | |
c_ij[idx] = diag | |
# Eq (22), typo in the article in the denominator i2 -> i1 | |
num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) - | |
abs(z_ij[i2, k] - z_ij[:, k])) | |
denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) - | |
abs(z_ij[i1, k] - z_ij[:, k])) | |
gamma = num / denum | |
# Eq (23) | |
c_i1j = gamma * c_ij[i1, :] | |
# Eq (24) | |
c_i2j = c_ij[i2, :] / gamma | |
sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :] | |
mask = np.ones(n_samples, dtype=bool) | |
mask[[i1, i2]] = False | |
sum_ = sum(sum_[mask]) | |
disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_) | |
return disc_ep | |
from numba import jit, njit | |
def perturb_discrepancy_numba(sample, i1, i2, k, disc, bounds=None): | |
"""Centered discrepancy after and elementary perturbation on a LHS. | |
An elementary perturbation consists of an exchange of coordinates between | |
two points: ``sample[i1, k] <-> sample[i2, k]``. By construction, | |
this operation conserves the LHS properties. | |
Parameters | |
---------- | |
sample : array_like (n_samples, k_vars) | |
The sample (before permutation) to compute the discrepancy from. | |
i1 : int | |
The first line of the elementary permutation. | |
i2 : int | |
The second line of the elementary permutation. | |
k : int | |
The column of the elementary permutation. | |
disc : float | |
Centered discrepancy of the design before permutation. | |
bounds : tuple or array_like ([min, k_vars], [max, k_vars]) | |
Desired range of transformed data. The transformation apply the bounds | |
on the sample and not the theoretical space, unit cube. Thus min and | |
max values of the sample will coincide with the bounds. | |
Returns | |
------- | |
discrepancy : float | |
Centered discrepancy. | |
References | |
---------- | |
[1] Jin et al. "An efficient algorithm for constructing optimal design | |
of computer experiments", Journal of Statistical Planning and | |
Inference, 2005. | |
""" | |
sample = np.asarray(sample) | |
n_samples, n_dims = sample.shape | |
# Sample scaling from bounds to unit hypercube | |
if bounds is not None: | |
min_ = bounds.min(axis=0) | |
max_ = bounds.max(axis=0) | |
sample = (sample - min_) / (max_ - min_) | |
z_ij = sample - 0.5 | |
alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k])) | |
beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k])) | |
# @jit(nopython=True) | |
def g_i(i, z_ij): | |
g_ = 1. | |
for dim in range(n_dims): | |
g_ *= 1 + abs(z_ij[i, dim]) | |
return g_ | |
# @jit(nopython=True) | |
def h_i(i, z_ij): | |
h_ = 1. | |
for dim in range(n_dims): | |
h_ *= 1 + 0.5 * abs(z_ij[i, dim]) - 0.5 * (z_ij[i, dim] ** 2) | |
return h_ | |
@jit#(nopython=True) #, parallel=True | |
def mat_c_ij(c_ij, z_ij, n_samples, n_dims): | |
for i in range(n_samples): | |
for j in range(n_samples): | |
if i != j: | |
# Eq (19) | |
c_ = 1. | |
for dim in range(n_dims): | |
c_ = c_ * 0.5 * (2 + abs(z_ij[i][dim]) + abs(z_ij[j][dim]) - abs(z_ij[i][dim] - z_ij[j][dim])) | |
c_ij[i][j] = c_ / (n_samples ** 2) | |
else: | |
# Eq (20) | |
g_ = 1. | |
for dim in range(n_dims): | |
g_ = g_ * (1 + abs(z_ij[i][dim])) | |
h_ = 1. | |
for dim in range(n_dims): | |
h_ = h_ * (1 + 0.5 * abs(z_ij[i][dim]) - 0.5 * (z_ij[i][dim] ** 2)) | |
c_ij[i][j] = g_ / (n_samples ** 2) - 2 * h_ / n_samples | |
return c_ij | |
# Eq (22), typo in the article in the denominator i2 -> i1 | |
num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) - | |
abs(z_ij[i2, k] - z_ij[:, k])) | |
denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) - | |
abs(z_ij[i1, k] - z_ij[:, k])) | |
gamma = num / denum | |
# Eq (25), typo in the article g is missing | |
c_i1i1 = ((g_i(i1, z_ij) * alpha) / (n_samples ** 2) - | |
2 * alpha * beta * h_i(i1, z_ij) / n_samples) | |
# Eq (26), typo in the article n ** 2 | |
c_i2i2 = ((g_i(i2, z_ij) / ((n_samples ** 2) * alpha)) - | |
(2 * h_i(i2, z_ij) / (n_samples * alpha * beta))) | |
c_ij = np.zeros((n_samples, n_samples)).tolist() | |
c_ij = mat_c_ij(c_ij, z_ij.tolist(), n_samples, n_dims) | |
c_ij = np.array(c_ij) | |
# Eq (23) | |
c_i1j = gamma * c_ij[i1, :] | |
# Eq (24) | |
c_i2j = c_ij[i2, :] / gamma | |
sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :] | |
mask = np.ones(n_samples, dtype=bool) | |
mask[[i1, i2]] = False | |
sum_ = sum(sum_[mask]) | |
disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_) | |
return disc_ep | |
# import time | |
# i_time = time.time() | |
disc_new = perturb_discrepancy(doe_init, row_1, row_2, col, disc_init) | |
# print(time.time() - i_time) | |
# i_time = time.time() | |
disc_new2 = perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init) | |
# print(time.time() - i_time) | |
print(f'Perturbation computation: {disc_new}') | |
print(f'Perturbation numba computation: {disc_new2}') | |
import timeit | |
full_time = timeit.repeat('sequences.discrepancy(doe)', | |
number=20, repeat=1, | |
setup="from __main__ import sequences, doe, np") | |
full_time = np.mean(full_time) | |
print(f'Time full: {full_time} s') | |
perm_time = timeit.repeat('perturb_discrepancy(doe_init, row_1, row_2, col, disc_init)', | |
number=20, repeat=1, | |
setup="from __main__ import perturb_discrepancy, doe_init, row_1, row_2, col, disc_init, np") | |
perm_time = np.mean(perm_time) | |
print(f'Time permuation: {perm_time} s') | |
perm_numba_time = timeit.repeat('perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init)', | |
number=20, repeat=1, | |
setup="from __main__ import perturb_discrepancy_numba, doe_init, row_1, row_2, col, disc_init, np") | |
perm_numba_time = np.mean(perm_numba_time) | |
print(f'Time numba permuation: {perm_numba_time} s') | |
print(perm_time / full_time) | |
print(perm_numba_time / full_time) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment