christopherlovell · May 11, 2018 11:07
diff --git a/resample_distribution.py b/resample_distribution.py
 from scipy.stats import binned_statistic
 import random

 def resample_distribution(x, y, k=300, bin_limits=None):
    """
    Resample y to fit distribution of x
    
    Args:
        x, y (array) distributions
        k (int) number of samples
        bin_limits (array) 
    """
    
    if bin_limits is None:
        bin_limits = np.linspace(0,1.3,30)
 #         _min = np.max([np.min(x), np.min(y)])
 #         _max = np.min([np.max(x), np.max(y)])
 #         _min += (_max - _min) / 5
 #         _max -= (_max - _min) / 5
 #         bin_limits = np.linspace(_min, _max, 20)

    count_x, binedges, binnumber_x = binned_statistic(x=x, values=np.ones(len(x)), 
                                                  statistic='count', bins=bin_limits)

    count_y, binedges, binnumber_y = binned_statistic(x=y, values=np.ones(len(y)), 
                                                   statistic='count', bins=bin_limits)

    prob = np.array([count_x[binnumber_y[i] - 1] for i in np.arange(len(y))])
    prob2 = np.array([count_y[binnumber_y[i] - 1] for i in np.arange(len(y))])
    
    probabilities = prob / prob2

    indices = random.choices(np.arange(len(y)), k = k, weights=probabilities)
    
    return indices, bin_limits
	from scipy.stats import binned_statistic
	import random

	def resample_distribution(x, y, k=300, bin_limits=None):
	"""
	Resample y to fit distribution of x

	Args:
	x, y (array) distributions
	k (int) number of samples
	bin_limits (array)
	"""

	if bin_limits is None:
	bin_limits = np.linspace(0,1.3,30)
	# _min = np.max([np.min(x), np.min(y)])
	# _max = np.min([np.max(x), np.max(y)])
	# _min += (_max - _min) / 5
	# _max -= (_max - _min) / 5
	# bin_limits = np.linspace(_min, _max, 20)

	count_x, binedges, binnumber_x = binned_statistic(x=x, values=np.ones(len(x)),
	statistic='count', bins=bin_limits)

	count_y, binedges, binnumber_y = binned_statistic(x=y, values=np.ones(len(y)),
	statistic='count', bins=bin_limits)

	prob = np.array([count_x[binnumber_y[i] - 1] for i in np.arange(len(y))])
	prob2 = np.array([count_y[binnumber_y[i] - 1] for i in np.arange(len(y))])

	probabilities = prob / prob2

	indices = random.choices(np.arange(len(y)), k = k, weights=probabilities)

	return indices, bin_limits