tupui · January 28, 2019 22:34
diff --git a/perturb_discrepancy.py b/perturb_discrepancy.py
 from functools import lru_cache
 import numpy as np
 import copy
 import openturns as ot
 from statsmodels.tools import sequences

 dim = 2
 n_samples = 20
 doe_init = np.array(ot.SobolSequence(dim).generate(n_samples))
 disc_init = sequences.discrepancy(doe_init)
 print(f'Initial discrepancy: {disc_init}')

 doe = copy.deepcopy(doe_init)

 row_1, row_2, col = 5, 7, 1
 doe[row_1, col], doe[row_2, col] = doe[row_2, col], doe[row_1, col]
 print(f'Full computation: {sequences.discrepancy(doe)}')


 # @profile
 def perturb_discrepancy(sample, i1, i2, k, disc, bounds=None):
    """Centered discrepancy after and elementary perturbation on a LHS.

    An elementary perturbation consists of an exange of coordinates between
    two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
    this operation conserve the LHS properties.

    Parameters
    ----------
    sample : array_like (n_samples, k_vars)
        The sample (before permutation) to compute the discrepancy from.
    i1 : int
        The first line of the elementary permutation.
    i2 : int
        The second line of the elementary permutation.
    k : int
        The column of the elementary permutation.
    disc : float
        Centered discrepancy of the design before permutation.
    bounds : tuple or array_like ([min, k_vars], [max, k_vars])
        Desired range of transformed data. The transformation apply the bounds
        on the sample and not the theoretical space, unit cube. Thus min and
        max values of the sample will coincide with the bounds.

    Returns
    -------
    discrepancy : float
        Centered discrepancy.

    References
    ----------
    [1] Jin et al. "An efficient algorithm for constructing optimal design
        of computer experiments", Journal of Statistical Planning and
        Inference, 2005.

    """
    sample = np.asarray(sample)
    n_samples = sample.shape[0]

    # Sample scaling from bounds to unit hypercube
    if bounds is not None:
        min_ = bounds.min(axis=0)
        max_ = bounds.max(axis=0)
        sample = (sample - min_) / (max_ - min_)

    z_ij = sample - 0.5

    alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
    beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))

    def g_i(i):
        return np.prod(1 + abs(z_ij[i, :]))

    def h_i(i):
        return np.prod(1 + 0.5 * abs(z_ij[i, :]) - 0.5 * (z_ij[i, :] ** 2))

    # @lru_cache(maxsize=None)
    # def c_ij_iter(i, j):
    #     # print(i, j)
    #     if i != j:
    #         # Eq (19)
    #         c_ = 0.5 * (2 + abs(z_ij[i, :]) + abs(z_ij[j, :]) -
    #                     abs(z_ij[i, :] - z_ij[j, :]))
    #         c_ = np.prod(c_) / (n_samples ** 2)
    #     else:
    #         # Eq (20)
    #         c_ = g_i(i, z_ij) / (n_samples ** 2) - 2 * h_i(i, z_ij) / n_samples

    #     return c_

    # Eq (25), typo in the article g is missing
    c_i1i1 = ((g_i(i1) * alpha) / (n_samples ** 2) -
              2 * alpha * beta * h_i(i1) / n_samples)
    # Eq (26), typo in the article n ** 2
    c_i2i2 = ((g_i(i2) / ((n_samples ** 2) * alpha)) -
              (2 * h_i(i2) / (n_samples * alpha * beta)))

    # c_ij
    c_ij = []

    # Eq (19)
    for i in range(n_samples):
        c_ij.append(1. / n_samples ** 2 * np.prod(0.5 * (2. + abs(z_ij[i, :]) + abs(z_ij) - abs(z_ij[i, :] - z_ij)), axis=1))

    c_ij = np.array(c_ij)

    # Eq (20)
    diag = 1. / n_samples ** 2 * np.prod(1 + abs(z_ij), axis=1) - 2. / n_samples * np.prod(1. + 0.5 * abs(z_ij) - 0.5 * z_ij ** 2, axis=1)

    idx = np.diag_indices(n_samples)
    c_ij[idx] = diag

    # Eq (22), typo in the article in the denominator i2 -> i1
    num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) -
           abs(z_ij[i2, k] - z_ij[:, k]))
    denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) -
             abs(z_ij[i1, k] - z_ij[:, k]))
    gamma = num / denum

    # Eq (23)
    c_i1j = gamma * c_ij[i1, :]
    # Eq (24)
    c_i2j = c_ij[i2, :] / gamma

    sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :]
    mask = np.ones(n_samples, dtype=bool)
    mask[[i1, i2]] = False
    sum_ = sum(sum_[mask])

    disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_)

    return disc_ep


 from numba import jit, njit

 def perturb_discrepancy_numba(sample, i1, i2, k, disc, bounds=None):
    """Centered discrepancy after and elementary perturbation on a LHS.

    An elementary perturbation consists of an exchange of coordinates between
    two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
    this operation conserves the LHS properties.

    Parameters
    ----------
    sample : array_like (n_samples, k_vars)
        The sample (before permutation) to compute the discrepancy from.
    i1 : int
        The first line of the elementary permutation.
    i2 : int
        The second line of the elementary permutation.
    k : int
        The column of the elementary permutation.
    disc : float
        Centered discrepancy of the design before permutation.
    bounds : tuple or array_like ([min, k_vars], [max, k_vars])
        Desired range of transformed data. The transformation apply the bounds
        on the sample and not the theoretical space, unit cube. Thus min and
        max values of the sample will coincide with the bounds.

    Returns
    -------
    discrepancy : float
        Centered discrepancy.

    References
    ----------
    [1] Jin et al. "An efficient algorithm for constructing optimal design
        of computer experiments", Journal of Statistical Planning and
        Inference, 2005.

    """
    sample = np.asarray(sample)
    n_samples, n_dims = sample.shape

    # Sample scaling from bounds to unit hypercube
    if bounds is not None:
        min_ = bounds.min(axis=0)
        max_ = bounds.max(axis=0)
        sample = (sample - min_) / (max_ - min_)

    z_ij = sample - 0.5

    alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
    beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))

    # @jit(nopython=True)
    def g_i(i, z_ij):
        g_ = 1.
        for dim in range(n_dims):
            g_ *= 1 + abs(z_ij[i, dim])
        return g_

    # @jit(nopython=True)
    def h_i(i, z_ij):
        h_ = 1.
        for dim in range(n_dims):
            h_ *= 1 + 0.5 * abs(z_ij[i, dim]) - 0.5 * (z_ij[i, dim] ** 2)
        return h_

    @jit#(nopython=True) #, parallel=True
    def mat_c_ij(c_ij, z_ij, n_samples, n_dims):

        for i in range(n_samples):
            for j in range(n_samples):
                if i != j:
                    # Eq (19)
                    c_ = 1.
                    for dim in range(n_dims):
                        c_ = c_ * 0.5 * (2 + abs(z_ij[i][dim]) + abs(z_ij[j][dim]) - abs(z_ij[i][dim] - z_ij[j][dim]))

                    c_ij[i][j] = c_ / (n_samples ** 2)
                else:
                    # Eq (20)
                    g_ = 1.
                    for dim in range(n_dims):
                        g_ = g_ * (1 + abs(z_ij[i][dim]))

                    h_ = 1.
                    for dim in range(n_dims):
                        h_ = h_ * (1 + 0.5 * abs(z_ij[i][dim]) - 0.5 * (z_ij[i][dim] ** 2))

                    c_ij[i][j] = g_ / (n_samples ** 2) - 2 * h_ / n_samples

        return c_ij

    # Eq (22), typo in the article in the denominator i2 -> i1
    num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) -
           abs(z_ij[i2, k] - z_ij[:, k]))
    denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) -
             abs(z_ij[i1, k] - z_ij[:, k]))
    gamma = num / denum

    # Eq (25), typo in the article g is missing
    c_i1i1 = ((g_i(i1, z_ij) * alpha) / (n_samples ** 2) -
              2 * alpha * beta * h_i(i1, z_ij) / n_samples)
    # Eq (26), typo in the article n ** 2
    c_i2i2 = ((g_i(i2, z_ij) / ((n_samples ** 2) * alpha)) -
              (2 * h_i(i2, z_ij) / (n_samples * alpha * beta)))

    c_ij = np.zeros((n_samples, n_samples)).tolist()
    c_ij = mat_c_ij(c_ij, z_ij.tolist(), n_samples, n_dims)
    c_ij = np.array(c_ij)

    # Eq (23)
    c_i1j = gamma * c_ij[i1, :]
    # Eq (24)
    c_i2j = c_ij[i2, :] / gamma

    sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :]
    mask = np.ones(n_samples, dtype=bool)
    mask[[i1, i2]] = False
    sum_ = sum(sum_[mask])

    disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_)

    return disc_ep

 # import time

 # i_time = time.time()
 disc_new = perturb_discrepancy(doe_init, row_1, row_2, col, disc_init)
 # print(time.time() - i_time)

 # i_time = time.time()
 disc_new2 = perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init)
 # print(time.time() - i_time)

 print(f'Perturbation computation: {disc_new}')
 print(f'Perturbation numba computation: {disc_new2}')


 import timeit

 full_time = timeit.repeat('sequences.discrepancy(doe)',
                          number=20, repeat=1,
                          setup="from __main__ import sequences, doe, np")

 full_time = np.mean(full_time)
 print(f'Time full: {full_time} s')


 perm_time = timeit.repeat('perturb_discrepancy(doe_init, row_1, row_2, col, disc_init)',
                          number=20, repeat=1,
                          setup="from __main__ import perturb_discrepancy, doe_init, row_1, row_2, col, disc_init, np")

 perm_time = np.mean(perm_time)
 print(f'Time permuation: {perm_time} s')


 perm_numba_time = timeit.repeat('perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init)',
                          number=20, repeat=1,
                          setup="from __main__ import perturb_discrepancy_numba, doe_init, row_1, row_2, col, disc_init, np")

 perm_numba_time = np.mean(perm_numba_time)
 print(f'Time numba permuation: {perm_numba_time} s')


 print(perm_time / full_time)
 print(perm_numba_time / full_time)
	from functools import lru_cache
	import numpy as np
	import copy
	import openturns as ot
	from statsmodels.tools import sequences

	dim = 2
	n_samples = 20
	doe_init = np.array(ot.SobolSequence(dim).generate(n_samples))
	disc_init = sequences.discrepancy(doe_init)
	print(f'Initial discrepancy: {disc_init}')

	doe = copy.deepcopy(doe_init)

	row_1, row_2, col = 5, 7, 1
	doe[row_1, col], doe[row_2, col] = doe[row_2, col], doe[row_1, col]
	print(f'Full computation: {sequences.discrepancy(doe)}')


	# @profile
	def perturb_discrepancy(sample, i1, i2, k, disc, bounds=None):
	"""Centered discrepancy after and elementary perturbation on a LHS.

	An elementary perturbation consists of an exange of coordinates between
	two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
	this operation conserve the LHS properties.

	Parameters
	----------
	sample : array_like (n_samples, k_vars)
	The sample (before permutation) to compute the discrepancy from.
	i1 : int
	The first line of the elementary permutation.
	i2 : int
	The second line of the elementary permutation.
	k : int
	The column of the elementary permutation.
	disc : float
	Centered discrepancy of the design before permutation.
	bounds : tuple or array_like ([min, k_vars], [max, k_vars])
	Desired range of transformed data. The transformation apply the bounds
	on the sample and not the theoretical space, unit cube. Thus min and
	max values of the sample will coincide with the bounds.

	Returns
	-------
	discrepancy : float
	Centered discrepancy.

	References
	----------
	[1] Jin et al. "An efficient algorithm for constructing optimal design
	of computer experiments", Journal of Statistical Planning and
	Inference, 2005.

	"""
	sample = np.asarray(sample)
	n_samples = sample.shape[0]

	# Sample scaling from bounds to unit hypercube
	if bounds is not None:
	min_ = bounds.min(axis=0)
	max_ = bounds.max(axis=0)
	sample = (sample - min_) / (max_ - min_)

	z_ij = sample - 0.5

	alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
	beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))

	def g_i(i):
	return np.prod(1 + abs(z_ij[i, :]))

	def h_i(i):
	return np.prod(1 + 0.5 * abs(z_ij[i, :]) - 0.5 * (z_ij[i, :] ** 2))

	# @lru_cache(maxsize=None)
	# def c_ij_iter(i, j):
	# # print(i, j)
	# if i != j:
	# # Eq (19)
	# c_ = 0.5 * (2 + abs(z_ij[i, :]) + abs(z_ij[j, :]) -
	# abs(z_ij[i, :] - z_ij[j, :]))
	# c_ = np.prod(c_) / (n_samples ** 2)
	# else:
	# # Eq (20)
	# c_ = g_i(i, z_ij) / (n_samples ** 2) - 2 * h_i(i, z_ij) / n_samples

	# return c_

	# Eq (25), typo in the article g is missing
	c_i1i1 = ((g_i(i1) * alpha) / (n_samples ** 2) -
	2 * alpha * beta * h_i(i1) / n_samples)
	# Eq (26), typo in the article n ** 2
	c_i2i2 = ((g_i(i2) / ((n_samples ** 2) * alpha)) -
	(2 * h_i(i2) / (n_samples * alpha * beta)))

	# c_ij
	c_ij = []

	# Eq (19)
	for i in range(n_samples):
	c_ij.append(1. / n_samples ** 2 * np.prod(0.5 * (2. + abs(z_ij[i, :]) + abs(z_ij) - abs(z_ij[i, :] - z_ij)), axis=1))

	c_ij = np.array(c_ij)

	# Eq (20)
	diag = 1. / n_samples ** 2 * np.prod(1 + abs(z_ij), axis=1) - 2. / n_samples * np.prod(1. + 0.5 * abs(z_ij) - 0.5 * z_ij ** 2, axis=1)

	idx = np.diag_indices(n_samples)
	c_ij[idx] = diag

	# Eq (22), typo in the article in the denominator i2 -> i1
	num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) -
	abs(z_ij[i2, k] - z_ij[:, k]))
	denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) -
	abs(z_ij[i1, k] - z_ij[:, k]))
	gamma = num / denum

	# Eq (23)
	c_i1j = gamma * c_ij[i1, :]
	# Eq (24)
	c_i2j = c_ij[i2, :] / gamma

	sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :]
	mask = np.ones(n_samples, dtype=bool)
	mask[[i1, i2]] = False
	sum_ = sum(sum_[mask])

	disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_)

	return disc_ep


	from numba import jit, njit

	def perturb_discrepancy_numba(sample, i1, i2, k, disc, bounds=None):
	"""Centered discrepancy after and elementary perturbation on a LHS.

	An elementary perturbation consists of an exchange of coordinates between
	two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
	this operation conserves the LHS properties.

	Parameters
	----------
	sample : array_like (n_samples, k_vars)
	The sample (before permutation) to compute the discrepancy from.
	i1 : int
	The first line of the elementary permutation.
	i2 : int
	The second line of the elementary permutation.
	k : int
	The column of the elementary permutation.
	disc : float
	Centered discrepancy of the design before permutation.
	bounds : tuple or array_like ([min, k_vars], [max, k_vars])
	Desired range of transformed data. The transformation apply the bounds
	on the sample and not the theoretical space, unit cube. Thus min and
	max values of the sample will coincide with the bounds.

	Returns
	-------
	discrepancy : float
	Centered discrepancy.

	References
	----------
	[1] Jin et al. "An efficient algorithm for constructing optimal design
	of computer experiments", Journal of Statistical Planning and
	Inference, 2005.

	"""
	sample = np.asarray(sample)
	n_samples, n_dims = sample.shape

	# Sample scaling from bounds to unit hypercube
	if bounds is not None:
	min_ = bounds.min(axis=0)
	max_ = bounds.max(axis=0)
	sample = (sample - min_) / (max_ - min_)

	z_ij = sample - 0.5

	alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
	beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))

	# @jit(nopython=True)
	def g_i(i, z_ij):
	g_ = 1.
	for dim in range(n_dims):
	g_ *= 1 + abs(z_ij[i, dim])
	return g_

	# @jit(nopython=True)
	def h_i(i, z_ij):
	h_ = 1.
	for dim in range(n_dims):
	h_ = 1 + 0.5 abs(z_ij[i, dim]) - 0.5 * (z_ij[i, dim] ** 2)
	return h_

	@jit#(nopython=True) #, parallel=True
	def mat_c_ij(c_ij, z_ij, n_samples, n_dims):

	for i in range(n_samples):
	for j in range(n_samples):
	if i != j:
	# Eq (19)
	c_ = 1.
	for dim in range(n_dims):
	c_ = c_ * 0.5 * (2 + abs(z_ij[i][dim]) + abs(z_ij[j][dim]) - abs(z_ij[i][dim] - z_ij[j][dim]))

	c_ij[i][j] = c_ / (n_samples ** 2)
	else:
	# Eq (20)
	g_ = 1.
	for dim in range(n_dims):
	g_ = g_ * (1 + abs(z_ij[i][dim]))

	h_ = 1.
	for dim in range(n_dims):
	h_ = h_ * (1 + 0.5 * abs(z_ij[i][dim]) - 0.5 * (z_ij[i][dim] ** 2))

	c_ij[i][j] = g_ / (n_samples ** 2) - 2 * h_ / n_samples

	return c_ij

	# Eq (22), typo in the article in the denominator i2 -> i1
	num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) -
	abs(z_ij[i2, k] - z_ij[:, k]))
	denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) -
	abs(z_ij[i1, k] - z_ij[:, k]))
	gamma = num / denum

	# Eq (25), typo in the article g is missing
	c_i1i1 = ((g_i(i1, z_ij) * alpha) / (n_samples ** 2) -
	2 * alpha * beta * h_i(i1, z_ij) / n_samples)
	# Eq (26), typo in the article n ** 2
	c_i2i2 = ((g_i(i2, z_ij) / ((n_samples ** 2) * alpha)) -
	(2 * h_i(i2, z_ij) / (n_samples * alpha * beta)))

	c_ij = np.zeros((n_samples, n_samples)).tolist()
	c_ij = mat_c_ij(c_ij, z_ij.tolist(), n_samples, n_dims)
	c_ij = np.array(c_ij)

	# Eq (23)
	c_i1j = gamma * c_ij[i1, :]
	# Eq (24)
	c_i2j = c_ij[i2, :] / gamma

	sum_ = c_i1j - c_ij[i1, :] + c_i2j - c_ij[i2, :]
	mask = np.ones(n_samples, dtype=bool)
	mask[[i1, i2]] = False
	sum_ = sum(sum_[mask])

	disc_ep = (disc + c_i1i1 - c_ij[i1, i1] + c_i2i2 - c_ij[i2, i2] + 2 * sum_)

	return disc_ep

	# import time

	# i_time = time.time()
	disc_new = perturb_discrepancy(doe_init, row_1, row_2, col, disc_init)
	# print(time.time() - i_time)

	# i_time = time.time()
	disc_new2 = perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init)
	# print(time.time() - i_time)

	print(f'Perturbation computation: {disc_new}')
	print(f'Perturbation numba computation: {disc_new2}')


	import timeit

	full_time = timeit.repeat('sequences.discrepancy(doe)',
	number=20, repeat=1,
	setup="from __main__ import sequences, doe, np")

	full_time = np.mean(full_time)
	print(f'Time full: {full_time} s')


	perm_time = timeit.repeat('perturb_discrepancy(doe_init, row_1, row_2, col, disc_init)',
	number=20, repeat=1,
	setup="from __main__ import perturb_discrepancy, doe_init, row_1, row_2, col, disc_init, np")

	perm_time = np.mean(perm_time)
	print(f'Time permuation: {perm_time} s')


	perm_numba_time = timeit.repeat('perturb_discrepancy_numba(doe_init, row_1, row_2, col, disc_init)',
	number=20, repeat=1,
	setup="from __main__ import perturb_discrepancy_numba, doe_init, row_1, row_2, col, disc_init, np")

	perm_numba_time = np.mean(perm_numba_time)
	print(f'Time numba permuation: {perm_numba_time} s')


	print(perm_time / full_time)
	print(perm_numba_time / full_time)