Last active
May 11, 2020 11:16
-
-
Save ipashchenko/26fc6024dfaf54f2f5f3 to your computer and use it in GitHub Desktop.
Model selection via Bayes factor estimation with ``emcee`` (see http://stronginference.com/bayes-factors-pymc.html for ``pymc`` version of model probabilities calculation)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from scipy import stats | |
| from emcee import PTSampler, EnsembleSampler | |
| try: | |
| import matplotlib.pyplot as plt | |
| except: | |
| plt = None | |
| def lnlike_geom(p, y): | |
| """ | |
| The probability distribution of the number of failures before the first | |
| success, supported on the set { 0, 1, 2, 3, ... }. | |
| See http://en.wikipedia.org/wiki/Geometric_distribution | |
| """ | |
| return (y * np.log(1. - p) + np.log(p)).sum() | |
| # Beta(0.1, 5) prior on geometrical distribution ``p`` parameter | |
| # - VERY informative (but i am trying to replicate ``pymc`` result). | |
| def lnprior_geom(p): | |
| return stats.beta.logpdf(p, 0.1, 5.) | |
| def lnpost_geom(p, y): | |
| lnpr = lnprior_geom(p) | |
| if np.isinf(lnpr): | |
| return -np.inf | |
| else: | |
| return lnlike_geom(p, y) + lnpr | |
| def lnlike_pois(lambda_, y): | |
| return stats.poisson.logpmf(y, lambda_).sum() | |
| # Uniform(0, 1000) prior on Poisson distribution ``lambda`` parameter | |
| def lnprior_pois(lambda_): | |
| return stats.uniform.logpdf(lambda_, 0., 1000.) | |
| def lnpost_pois(lambda_, y): | |
| lnpr = lnprior_pois(lambda_) | |
| if np.isinf(lnpr): | |
| return -np.inf | |
| else: | |
| return lnlike_pois(lambda_, y) + lnpr | |
| if __name__ == '__main__': | |
| y = np.array([0, 1, 2, 3, 8]) | |
| ntemps = 20 | |
| nwalkers = 50 | |
| ndim = 1 | |
| print "Check prob. functions using EnsembleSampler" | |
| print "Checking geometrical model..." | |
| sampler = EnsembleSampler(nwalkers, ndim, lnpost_geom, args=(y,)) | |
| p0 = np.random.uniform(low=0, high=1.0, size=(nwalkers, ndim)) | |
| for p, lnprob, lnlike in sampler.sample(p0, iterations=200): | |
| pass | |
| sampler.reset() | |
| for p, lnprob, lnlike in sampler.sample(p, iterations=2000, thin=10): | |
| pass | |
| if plt: | |
| plt.hist(sampler.flatchain[::10, :], bins=30, normed=True) | |
| plt.xlabel(r'p for geometrical model') | |
| plt.ylabel(r'p(p | data)') | |
| plt.show() | |
| print "Checking Poisson model..." | |
| sampler = EnsembleSampler(nwalkers, ndim, lnpost_pois, args=(y,)) | |
| p0 = stats.gamma.rvs(3., 2., size=nwalkers * ndim).reshape((nwalkers, ndim)) | |
| for p, lnprob, lnlike in sampler.sample(p0, iterations=200): | |
| pass | |
| sampler.reset() | |
| for p, lnprob, lnlike in sampler.sample(p, iterations=2000, thin=10): | |
| pass | |
| if plt: | |
| plt.close() | |
| plt.hist(sampler.flatchain[::10, :], bins=30, normed=True) | |
| plt.xlabel(r'$\lambda$ for Poisson model') | |
| plt.ylabel(r'p($\lambda$ | data)') | |
| plt.show() | |
| print "Estimating evidence for geometrical model using PTSampler" | |
| sampler = PTSampler(ntemps, nwalkers, ndim, lnlike_geom, lnprior_geom, | |
| loglargs=(y,)) | |
| p0 = np.random.uniform(low=0, high=1.0, size=(ntemps, nwalkers, ndim)) | |
| for p, lnprob, lnlike in sampler.sample(p0, iterations=200): | |
| pass | |
| sampler.reset() | |
| for p, lnprob, lnlike in sampler.sample(p, lnprob0=lnprob, | |
| lnlike0=lnlike, | |
| iterations=2000, thin=10): | |
| pass | |
| # Check perfomance of Tl (this code is from @farr) | |
| print "Temperature swap acceptance rates are " | |
| for b, rate in zip(sampler.betas, sampler.tswap_acceptance_fraction): | |
| print 'T = ', 1.0/b, ' accept = ', rate | |
| if plt is not None: | |
| plt.close() | |
| # Print a plot of the TI integrand: | |
| mean_logls = np.mean(sampler.lnlikelihood.reshape((ntemps, -1)), axis=1) | |
| betas = sampler.betas | |
| plt.plot(betas, betas*mean_logls) # \int d\beta <logl> = \int d\ln\beta \beta <logl> | |
| plt.xscale('log') | |
| plt.xlabel(r'$\beta$') | |
| plt.ylabel(r'$\beta \left\langle \ln L \right\rangle_\beta$') | |
| plt.title('Thermodynamic Integration Integrand') | |
| plt.show() | |
| logZ_geom, uncertainty = sampler.thermodynamic_integration_log_evidence() | |
| print "Estimated log evidence for geometrical model = ", logZ_geom, "+/-", uncertainty | |
| print "Estimating evidence for Poisson model using PT" | |
| sampler = PTSampler(ntemps, nwalkers, ndim, lnlike_pois, lnprior_pois, | |
| loglargs=(y,)) | |
| p0 = stats.gamma.rvs(3., 2., size=nwalkers * ndim * ntemps).reshape((ntemps, | |
| nwalkers, | |
| ndim)) | |
| for p, lnprob, lnlike in sampler.sample(p0, iterations=200): | |
| pass | |
| sampler.reset() | |
| for p, lnprob, lnlike in sampler.sample(p, lnprob0=lnprob, | |
| lnlike0=lnlike, | |
| iterations=2000, thin=10): | |
| pass | |
| # Check perfomance of Tl (this code is from @farr) | |
| print "Temperature swap acceptance rates are " | |
| for b, rate in zip(sampler.betas, sampler.tswap_acceptance_fraction): | |
| print 'T = ', 1.0/b, ' accept = ', rate | |
| if plt is not None: | |
| # Print a plot of the TI integrand: | |
| plt.close() | |
| mean_logls = np.mean(sampler.lnlikelihood.reshape((ntemps, -1)), axis=1) | |
| betas = sampler.betas | |
| plt.plot(betas, betas*mean_logls) # \int d\beta <logl> = \int d\ln\beta \beta <logl> | |
| plt.xscale('log') | |
| plt.xlabel(r'$\beta$') | |
| plt.ylabel(r'$\beta \left\langle \ln L \right\rangle_\beta$') | |
| plt.title('Thermodynamic Integration Integrand') | |
| plt.show() | |
| logZ_pois, uncertainty = sampler.thermodynamic_integration_log_evidence() | |
| print "Estimated log evidence for Poisson model = ", logZ_pois, "+/-", uncertainty | |
| print "Bayes factor of geometrical to Poison model = ", np.exp(logZ_geom - | |
| logZ_pois) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great gist! PTSampler has been moved from emcee to ptemcee though :( Is there any chance you might be able to update the code with this in mind? I am not au fait with emcee but I will try and get stuck in asap. Thanks!