Skip to content

Instantly share code, notes, and snippets.

@tupui
Created January 28, 2019 22:34
Show Gist options
  • Save tupui/58470ee1098b260bbed35af8667306ed to your computer and use it in GitHub Desktop.
Save tupui/58470ee1098b260bbed35af8667306ed to your computer and use it in GitHub Desktop.
Compute discrepancy for an elementary perturbation of a LHS - For statsmodels
from functools import lru_cache
import numpy as np
import copy
import openturns as ot
from statsmodels.tools import sequences
dim = 2
n_samples = 20
doe_init = np.array(ot.SobolSequence(dim).generate(n_samples))
disc_init = sequences.discrepancy(doe_init)
print(f'Initial discrepancy: {disc_init}')
doe = copy.deepcopy(doe_init)
row_1, row_2, col = 5, 7, 1
doe[row_1, col], doe[row_2, col] = doe[row_2, col], doe[row_1, col]
print(f'Full computation: {sequences.discrepancy(doe)}')
# @profile
def perturb_discrepancy(sample, i1, i2, k, disc, bounds=None):
"""Centered discrepancy after and elementary perturbation on a LHS.
An elementary perturbation consists of an exange of coordinates between
two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
this operation conserve the LHS properties.
Parameters
----------
sample : array_like (n_samples, k_vars)
The sample (before permutation) to compute the discrepancy from.
i1 : int
The first line of the elementary permutation.
i2 : int
The second line of the elementary permutation.
k : int
The column of the elementary permutation.
disc : float
Centered discrepancy of the design before permutation.
bounds : tuple or array_like ([min, k_vars], [max, k_vars])
Desired range of transformed data. The transformation apply the bounds
on the sample and not the theoretical space, unit cube. Thus min and
max values of the sample will coincide with the bounds.
Returns
-------
discrepancy : float
Centered discrepancy.
References
----------
[1] Jin et al. "An efficient algorithm for constructing optimal design
of computer experiments", Journal of Statistical Planning and
Inference, 2005.
"""
sample = np.asarray(sample)
n_samples = sample.shape[0]
# Sample scaling from bounds to unit hypercube
if bounds is not None:
min_ = bounds.min(axis=0)
max_ = bounds.max(axis=0)
sample = (sample - min_) / (max_ - min_)
z_ij = sample - 0.5
alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))
def g_i(i):
return np.prod(1 + abs(z_ij[i, :]))
def h_i(i):
return np.prod(1 + 0.5 * abs(z_ij[i, :]) - 0.5 * (z_ij[i, :] ** 2))
# @lru_cache(maxsize=None)
# def c_ij_iter(i, j):
# # print(i, j)
# if i != j:
# # Eq (19)
# c_ = 0.5 * (2 + abs(z_ij[i, :]) + abs(z_ij[j, :]) -
# abs(z_ij[i, :] - z_ij[j, :]))
# c_ = np.prod(c_) / (n_samples ** 2)
# else:
# # Eq (20)
# c_ = g_i(i, z_ij) / (n_samples ** 2) - 2 * h_i(i, z_ij) / n_samples
# return c_
# Eq (25), typo in the article g is missing
c_i1i1 = ((g_i(i1) * alpha) / (n_samples ** 2) -
2 * alpha * beta * h_i(i1) / n_samples)
# Eq (26), typo in the article n ** 2
c_i2i2 = ((g_i(i2) / ((n_samples ** 2) * alpha)) -
(2 * h_i(i2) / (n_samples * alpha * beta)))
# c_ij
c_ij = []
# Eq (19)
for i in range(n_samples):
c_ij.append(1. / n_samples ** 2 * np.prod(0.5 * (2. + abs(z_ij[i, :]) + abs(z_ij) - abs(z_ij[i, :] - z_ij)), axis=1))
c_ij = np.array(c_ij)
# Eq (20)
diag = 1. / n_samples ** 2 * np.prod(1 + abs(z_ij), axis=1) - 2. / n_samples * np.prod(1. + 0.5 * abs(z_ij) - 0.5 * z_ij ** 2, axis=1)
idx = np.diag_indices(n_samples)
c_ij[idx] = diag
# Eq (22), typo in the article in the denominator i2 -> i1
num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) -
abs(z_ij[i2, k] - z_ij[:, k]))
denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) -
abs(z_ij[i1, k] - z_ij[:, k]))
gamma = num / denum
# Eq (23)
c_i1j = gamma * c_ij[i1, :]
# Eq (24)
c_i2j = c_ij[i2, :] / gamma
sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :]
mask = np.ones(n_samples, dtype=bool)
mask[[i1, i2]] = False
sum_ = sum(sum_[mask])
disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_)
return disc_ep
from numba import jit, njit
def perturb_discrepancy_numba(sample, i1, i2, k, disc, bounds=None):
"""Centered discrepancy after and elementary perturbation on a LHS.
An elementary perturbation consists of an exchange of coordinates between
two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
this operation conserves the LHS properties.
Parameters
----------
sample : array_like (n_samples, k_vars)
The sample (before permutation) to compute the discrepancy from.
i1 : int
The first line of the elementary permutation.
i2 : int
The second line of the elementary permutation.
k : int
The column of the elementary permutation.
disc : float
Centered discrepancy of the design before permutation.
bounds : tuple or array_like ([min, k_vars], [max, k_vars])
Desired range of transformed data. The transformation apply the bounds
on the sample and not the theoretical space, unit cube. Thus min and
max values of the sample will coincide with the bounds.
Returns
-------
discrepancy : float
Centered discrepancy.
References
----------
[1] Jin et al. "An efficient algorithm for constructing optimal design
of computer experiments", Journal of Statistical Planning and
Inference, 2005.
"""
sample = np.asarray(sample)
n_samples, n_dims = sample.shape
# Sample scaling from bounds to unit hypercube
if bounds is not None:
min_ = bounds.min(axis=0)
max_ = bounds.max(axis=0)
sample = (sample - min_) / (max_ - min_)
z_ij = sample - 0.5
alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))
# @jit(nopython=True)
def g_i(i, z_ij):
g_ = 1.
for dim in range(n_dims):
g_ *= 1 + abs(z_ij[i, dim])
return g_
# @jit(nopython=True)
def h_i(i, z_ij):
h_ = 1.
for dim in range(n_dims):
h_ *= 1 + 0.5 * abs(z_ij[i, dim]) - 0.5 * (z_ij[i, dim] ** 2)
return h_
@jit#(nopython=True) #, parallel=True
def mat_c_ij(c_ij, z_ij, n_samples, n_dims):
for i in range(n_samples):
for j in range(n_samples):
if i != j:
# Eq (19)
c_ = 1.
for dim in range(n_dims):
c_ = c_ * 0.5 * (2 + abs(z_ij[i][dim]) + abs(z_ij[j][dim]) - abs(z_ij[i][dim] - z_ij[j][dim]))
c_ij[i][j] = c_ / (n_samples ** 2)
else:
# Eq (20)
g_ = 1.
for dim in range(n_dims):
g_ = g_ * (1 + abs(z_ij[i][dim]))
h_ = 1.
for dim in range(n_dims):
h_ = h_ * (1 + 0.5 * abs(z_ij[i][dim]) - 0.5 * (z_ij[i][dim] ** 2))
c_ij[i][j] = g_ / (n_samples ** 2) - 2 * h_ / n_samples
return c_ij
# Eq (22), typo in the article in the denominator i2 -> i1
num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) -
abs(z_ij[i2, k] - z_ij[:, k]))
denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) -
abs(z_ij[i1, k] - z_ij[:, k]))
gamma = num / denum
# Eq (25), typo in the article g is missing
c_i1i1 = ((g_i(i1, z_ij) * alpha) / (n_samples ** 2) -
2 * alpha * beta * h_i(i1, z_ij) / n_samples)
# Eq (26), typo in the article n ** 2
c_i2i2 = ((g_i(i2, z_ij) / ((n_samples ** 2) * alpha)) -
(2 * h_i(i2, z_ij) / (n_samples * alpha * beta)))
c_ij = np.zeros((n_samples, n_samples)).tolist()
c_ij = mat_c_ij(c_ij, z_ij.tolist(), n_samples, n_dims)
c_ij = np.array(c_ij)
# Eq (23)
c_i1j = gamma * c_ij[i1, :]
# Eq (24)
c_i2j = c_ij[i2, :] / gamma
sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :]
mask = np.ones(n_samples, dtype=bool)
mask[[i1, i2]] = False
sum_ = sum(sum_[mask])
disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_)
return disc_ep
# import time
# i_time = time.time()
disc_new = perturb_discrepancy(doe_init, row_1, row_2, col, disc_init)
# print(time.time() - i_time)
# i_time = time.time()
disc_new2 = perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init)
# print(time.time() - i_time)
print(f'Perturbation computation: {disc_new}')
print(f'Perturbation numba computation: {disc_new2}')
import timeit
full_time = timeit.repeat('sequences.discrepancy(doe)',
number=20, repeat=1,
setup="from __main__ import sequences, doe, np")
full_time = np.mean(full_time)
print(f'Time full: {full_time} s')
perm_time = timeit.repeat('perturb_discrepancy(doe_init, row_1, row_2, col, disc_init)',
number=20, repeat=1,
setup="from __main__ import perturb_discrepancy, doe_init, row_1, row_2, col, disc_init, np")
perm_time = np.mean(perm_time)
print(f'Time permuation: {perm_time} s')
perm_numba_time = timeit.repeat('perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init)',
number=20, repeat=1,
setup="from __main__ import perturb_discrepancy_numba, doe_init, row_1, row_2, col, disc_init, np")
perm_numba_time = np.mean(perm_numba_time)
print(f'Time numba permuation: {perm_numba_time} s')
print(perm_time / full_time)
print(perm_numba_time / full_time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment