Skip to content

Instantly share code, notes, and snippets.

@lukedeo
Created March 3, 2016 08:51
Show Gist options
  • Save lukedeo/e07f26539ec4a38529e7 to your computer and use it in GitHub Desktop.
Save lukedeo/e07f26539ec4a38529e7 to your computer and use it in GitHub Desktop.
A class to do n-dimensional reweighting
import numpy as np
class NDWeights(object):
'''
An n-dimension reweighting object
'''
def __init__(self, bins):
'''
Constructor for weighting
Args:
-----
bins: an int, a tuple of ints, an array, or a tuple of arrays depending on the
shape of the incoming content
Examples:
---------
Here, we weight the distribution in X to the distribution in flat_dist
>>> wt = NDWeights(10)
>>> X = np.random.normal(0, 5, (1000, 3)) # a random dataset
>>> flat_dist = np.random.uniform(-10, 10, (1000, 3))
>>> wt.fit(X)
>>> weights = wt.predict(X)
'''
super(NDWeights, self).__init__()
self.bins = bins
def fit(self, X, reference=None, normed=True):
'''
Determining binning and weighting for n-dimensional reweighting
Args:
-----
X: your array of shape (nb_samples, nb_features) that you want to reweight to match
some target distribution
reference: an array of shape (nb_samples_2, nb_features), which represents samples
from a distribution we want X to match
normed: Should the hists be normed? If the're not, the ratios are a count ratio to determine weights
'''
if len(X.shape) == 1:
X = X.reshape((X.shape[0], 1))
if reference is None:
n = X.shape[0]
reference = np.zeros((n, X.shape[1]))
for j in xrange(X.shape[1]):
reference[:, j] = np.random.uniform(X[:, j].min(), X[:, j].max(), n)
H, self.bins = np.histogramdd(X, bins=self.bins, normed=normed)
H_ref, _ = np.histogramdd(reference, bins=self.bins, normed=normed)
self.hypercube = H_ref / H
def predict(self, X):
'''
Get the weights for a new array
Args:
-----
X: your array of shape (nb_samples, nb_features) that you want to reweight using
your previously determined weights
Returns:
--------
a numpy array of shape (nb_samples, )
'''
if len(X.shape) == 1:
X = X.reshape((X.shape[0], 1))
ix = [(self.bins[i].searchsorted(X[:, i]) - 1) for i in xrange(len(self.bins))]
weights = np.copy(self.hypercube[ix])
weights[np.isinf(weights)] = weights[np.isfinite(weights)].max()
return weights
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment