Skip to content

Instantly share code, notes, and snippets.

@ipashchenko
Last active August 29, 2015 14:21
Show Gist options
  • Select an option

  • Save ipashchenko/58ce12b24f1225525c0e to your computer and use it in GitHub Desktop.

Select an option

Save ipashchenko/58ce12b24f1225525c0e to your computer and use it in GitHub Desktop.
discrete parameters sampling with ``emcee`` using @lfloeer's hack
import emcee
import numpy as np
from scipy import stats
try:
import matplotlib.pyplot as plt
except:
plt = None
vround = np.vectorize(round)
vfloat = np.vectorize(float)
def ln_prior(p):
"""
Uniform prior on trials and beta prior on probability per trial.
"""
if 5.5 <= p[0] <= 15.5 and 0. < p[1] < 1.:
return stats.beta.logpdf(p[1], 1., 1.)
else:
return -np.inf
def ln_like(p, data):
"""
Log-likelihood for binomial distribution. The number of trials in the
binomial distribution is calculated by rounding the first parameter.
"""
trials = round(p[0])
return np.sum(stats.binom.logpmf(data, trials, p[1]))
def ln_posterior(p, data):
lnp = ln_prior(p)
if np.isfinite(lnp):
return lnp + ln_like(p, data)
return lnp
def posterior_p(n_max, data, alpha, beta, delta=0.001):
"""
Posterior for ``(p, n)`` marginalized over ``p``.
See https://www.cna.org/sites/default/files/research/2787018500.pdf
:param n_max:
Upper limit for uniform prior on binomial parameter ``n``.
:param data:
Container of data (number of successes in each of unknown number ``n``
trials.
:param alpha:
Beta distribution parameter for prior on ``p``.
:param beta:
Beta distribution parameter for prior on ``p``.
:return:
Two numpy arrays - for ``n`` and for posterior probability of ``n``.
"""
beta = float(beta)
alpha = float(alpha)
data = np.asarray(data)
r = len(data)
t = sum(data)
xmax = max(data)
q = [1.]
js = [0]
j = 0
while True:
if (q[j] / sum(q) < delta) or (j >= n_max - xmax):
break
j += 1
factor1 = q[j - 1]
factor2 = (r * xmax - t + beta + (j - 1) * r + np.arange(r)) / \
(r * xmax + alpha + beta + (j - 1) * r + np.arange(r))
factor2 = factor2.cumprod()[-1]
factor3 = float((xmax + j)) ** r / (float(xmax) - data + j).cumprod()[-1]
q_j = factor1 * factor2 * factor3
q.append(q_j)
js.append(j)
return np.asarray(js) + xmax, np.asarray(q) / sum(q)
if __name__ == '__main__':
np.random.seed(42)
n_walkers = 100
n_dim = 2
# Generate data
data = stats.binom(10, 0.3).rvs(250)
# Sample with ``emcee``
sampler = emcee.EnsembleSampler(n_walkers, n_dim, ln_posterior, args=[data])
scales = np.array([[10., 0.2]])
offsets = np.array([[5, 0.2]])
pos0 = np.random.uniform(0, 1, (n_walkers, n_dim)) * scales + offsets
sampler.run_mcmc(pos0, 5000)
counts, bins = np.histogram(vround(sampler.flatchain[50000::50, 0]),
bins=10, range=[5.5, 15.5])
p_n_emcee = vfloat(counts) / sum(counts)
# Histogram errors. Thinning removed autocorrelation.
p_n_emcee_error = np.sqrt(p_n_emcee * (1. - p_n_emcee) / counts)
# Calculate marginalized posterior for ``n``
alpha = 1.
beta = 1.
# Upper limit for uniform prior on binomial parameter ``n``.
n_max = 15
n, p_n = posterior_p(n_max, data, alpha, beta)
if plt:
# Plot probabilities from calculation
plt.figure()
plt.subplot(211)
plt.plot(n, p_n, '.r', label='calculation')
plt.ylim([0, 0.25])
# Plot probabilities from ``emcee`` sampling
plt.errorbar(n, p_n_emcee, p_n_emcee_error, fmt='.',
label='emcee sampling')
plt.xlabel(r'n for binomial model')
plt.ylabel(r'p(n | data)')
plt.legend(loc='best')
plt.subplot(212)
# Plot posterior for ``p`` parameter
plt.hist(sampler.flatchain[30000::50, 1], bins=50, range=[0,1],
normed=True)
plt.xlabel(r'p for binomial model')
plt.ylabel(r'p(p | data)')
plt.show()
else:
print "Install matplotlib.pyplot for plots."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment