Created
February 8, 2010 15:40
-
-
Save dwf/298249 to your computer and use it in GitHub Desktop.
Some code (in progress) for maximum likelihood mixtures of beta distributions.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import scipy.special as spec | |
| import scipy.optimize as opt | |
| def negative_beta_likelihood(params, sumlogd, sumlog1md, numpts): | |
| """ | |
| Negative log likelihood for i.i.d. beta random variables, based | |
| on the parameters and the sufficient statistics. | |
| params -- length-2 array of (log) parameters | |
| sumlogd -- sum of the natural logarithm of the data points | |
| sumlogd1md -- sum of the natural logarithm of 1 - the data | |
| numpts -- number of data points | |
| """ | |
| params = np.exp(params) | |
| alpha, beta = params | |
| logbeta = spec.gammaln(alpha) + spec.gammaln(beta) - \ | |
| spec.gammaln(alpha + beta) | |
| fval = -alpha * sumlogd - beta * sumlog1md + sumlogd + \ | |
| sumlog1md + numpts * logbeta | |
| return fval | |
| def negative_beta_likelihood_gradient(params, sumlogd, sumlog1md, numpts): | |
| """ | |
| Gradient of the negative log likelihood for i.i.d. beta random variables, | |
| based on the parameters and the sufficient statistics. | |
| params -- length-2 array of (log) parameters | |
| sumlogd -- sum of the natural logarithm of the data points | |
| sumlogd1md -- sum of the natural logarithm of 1 - the data | |
| numpts -- number of data points | |
| """ | |
| params = np.exp(params) | |
| psiaplusb = spec.digamma(np.sum(params)) | |
| psia = spec.digamma(params[0]) | |
| psib = spec.digamma(params[1]) | |
| grad = -params * np.array([sumlogd - numpts * (psia - psiaplusb), | |
| sumlog1md - numpts * (psib - psiaplusb)]) | |
| return grad | |
| def fit_beta_from_sufficient_statistics(sumlogd, sumlog1md, numpts): | |
| xopt = opt.fmin_cg(negative_beta_likelihood, | |
| np.random.randn(2), | |
| fprime=negative_beta_likelihood_gradient, | |
| args=(sumlogd, sumlog1md, numpts), | |
| disp=False) | |
| return np.exp(xopt) | |
| def fit_beta_maximum_likelihood(data): | |
| numpts = data.shape[0] | |
| data = np.random.beta(5, 3, size=numpts) | |
| sumlogd = np.log(data).sum() | |
| sumlog1md = np.log(1 - data).sum() | |
| return fit_beta_from_sufficient_statistics(sumlogd, sumlog1md, numpts) | |
| def m_step(resp, logdata, log1mdata, sumresp, params=None): | |
| ncomponent, numpts = resp.shape | |
| if params is None: | |
| params = np.empty((ncomponent, 2)) | |
| sumlogd = (logdata * resp).sum(axis=1) | |
| sumlog1md = (log1mdata * resp).sum(axis=1) | |
| for comp in xrange(ncomponent): | |
| params[comp] = fit_beta_from_sufficient_statistics( | |
| sumlogd[comp], | |
| sumlog1md[comp], | |
| sumresp[comp]) | |
| mixprop = sumresp / sumresp.sum() | |
| return params, mixprop | |
| def fit_mixture(ncomponent, data): | |
| numpts = data.shape[0] | |
| resp = np.zeros((ncomponent, numpts), dtype=float) | |
| assignments = np.random.random_integers( | |
| low=0, | |
| high=(ncomponent - 1), | |
| size=numpts | |
| ) | |
| resp[assignments, np.arange(numpts)] = 1. | |
| logdata = np.log(data) | |
| log1mdata = np.log(1 - data) | |
| sumresp = resp.sum(axis=1) | |
| params, mix = m_step(resp, logdata, log1mdata, sumresp) | |
| print params, mix | |
| if __name__ == "__main__": | |
| numpts = 5000000 | |
| data = np.random.beta(5, 3, size=numpts) | |
| params = fit_beta_maximum_likelihood(data) | |
| print params |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment