Skip to content

Instantly share code, notes, and snippets.

@dwf
Created February 8, 2010 15:40
Show Gist options
  • Select an option

  • Save dwf/298249 to your computer and use it in GitHub Desktop.

Select an option

Save dwf/298249 to your computer and use it in GitHub Desktop.
Some code (in progress) for maximum likelihood mixtures of beta distributions.
import numpy as np
import scipy.special as spec
import scipy.optimize as opt
def negative_beta_likelihood(params, sumlogd, sumlog1md, numpts):
"""
Negative log likelihood for i.i.d. beta random variables, based
on the parameters and the sufficient statistics.
params -- length-2 array of (log) parameters
sumlogd -- sum of the natural logarithm of the data points
sumlogd1md -- sum of the natural logarithm of 1 - the data
numpts -- number of data points
"""
params = np.exp(params)
alpha, beta = params
logbeta = spec.gammaln(alpha) + spec.gammaln(beta) - \
spec.gammaln(alpha + beta)
fval = -alpha * sumlogd - beta * sumlog1md + sumlogd + \
sumlog1md + numpts * logbeta
return fval
def negative_beta_likelihood_gradient(params, sumlogd, sumlog1md, numpts):
"""
Gradient of the negative log likelihood for i.i.d. beta random variables,
based on the parameters and the sufficient statistics.
params -- length-2 array of (log) parameters
sumlogd -- sum of the natural logarithm of the data points
sumlogd1md -- sum of the natural logarithm of 1 - the data
numpts -- number of data points
"""
params = np.exp(params)
psiaplusb = spec.digamma(np.sum(params))
psia = spec.digamma(params[0])
psib = spec.digamma(params[1])
grad = -params * np.array([sumlogd - numpts * (psia - psiaplusb),
sumlog1md - numpts * (psib - psiaplusb)])
return grad
def fit_beta_from_sufficient_statistics(sumlogd, sumlog1md, numpts):
xopt = opt.fmin_cg(negative_beta_likelihood,
np.random.randn(2),
fprime=negative_beta_likelihood_gradient,
args=(sumlogd, sumlog1md, numpts),
disp=False)
return np.exp(xopt)
def fit_beta_maximum_likelihood(data):
numpts = data.shape[0]
data = np.random.beta(5, 3, size=numpts)
sumlogd = np.log(data).sum()
sumlog1md = np.log(1 - data).sum()
return fit_beta_from_sufficient_statistics(sumlogd, sumlog1md, numpts)
def m_step(resp, logdata, log1mdata, sumresp, params=None):
ncomponent, numpts = resp.shape
if params is None:
params = np.empty((ncomponent, 2))
sumlogd = (logdata * resp).sum(axis=1)
sumlog1md = (log1mdata * resp).sum(axis=1)
for comp in xrange(ncomponent):
params[comp] = fit_beta_from_sufficient_statistics(
sumlogd[comp],
sumlog1md[comp],
sumresp[comp])
mixprop = sumresp / sumresp.sum()
return params, mixprop
def fit_mixture(ncomponent, data):
numpts = data.shape[0]
resp = np.zeros((ncomponent, numpts), dtype=float)
assignments = np.random.random_integers(
low=0,
high=(ncomponent - 1),
size=numpts
)
resp[assignments, np.arange(numpts)] = 1.
logdata = np.log(data)
log1mdata = np.log(1 - data)
sumresp = resp.sum(axis=1)
params, mix = m_step(resp, logdata, log1mdata, sumresp)
print params, mix
if __name__ == "__main__":
numpts = 5000000
data = np.random.beta(5, 3, size=numpts)
params = fit_beta_maximum_likelihood(data)
print params
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment