Last active
August 29, 2015 13:57
-
-
Save straypacket/9519462 to your computer and use it in GitHub Desktop.
Using SciPy, NumPy and PyPlot to show vertical error bar graphs. Notes on comparison of Frequentism vs. Bayesianism.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import stats | |
np.random.seed(1) | |
F_true = 1000 # random range | |
N = 50 # number of measurements | |
F = stats.poisson(F_true).rvs(N) | |
e = np.sqrt(F) # errors on Poisson counts estimated via square root | |
import matplotlib.pyplot as plt | |
fig, ax = plt.subplots() | |
ax.errorbar(F, np.arange(N), xerr=e, fmt='ok', ecolor='gray', alpha=0.5) | |
ax.vlines([F_true], 0, N, linewidth=5, alpha=0.2) | |
ax.set_xlabel("Range") | |
ax.set_ylabel("Measurement number") | |
plt.show() | |
# Frequentism approach | |
# Based on likelihood function L(D|F_true) | |
# L(D|F_true) = Prod_1_to_n(P(D_n|F_true)) | |
w = 1. / e ** 2 | |
print(""" | |
F_true = {0} | |
F_est = {1:.0f} +/- {2:.0f} (based on {3} measurements) | |
""".format(F_true, (w * F).sum() / w.sum(), w.sum() ** -0.5, N)) | |
# From the current 50 samples, our error is of 0.4% | |
# | |
#F_true = 1000 | |
#F_est = 998 +/- 4 (based on 50 measurements) | |
# Bayesianism approach | |
# Bayes law: | |
# P(F_true|D) = (P(D|F_true) P(F_true)) / P(D) | |
# | |
# P(F_true|D) => Posterior | |
# P(D|F_True) => Likelihood (similar to frequentist) | |
# P(F_true) => Prior (what we knew before applying data) | |
# P(D) => Data probability (normalization term) | |
# | |
# If P(F_true) == 1 | |
# P(F_true|D) == L(D|F_true) | |
# | |
# Proof using MCMC | |
import emcee | |
def log_prior(theta): | |
return 1 # flat prior | |
def log_likelihood(theta, F, e): | |
return -0.5 * np.sum(np.log(2 * np.pi * e ** 2) + (F - theta[0]) ** 2 / e ** 2) | |
def log_posterior(theta, F, e): | |
return log_prior(theta) + log_likelihood(theta, F, e) | |
# Drawing samples from posterior | |
ndim = 1 # number of parameters in the model | |
nwalkers = 50 # number of MCMC walkers | |
nburn = 1000 # "burn-in" period to let chains stabilize | |
nsteps = 2000 # number of MCMC steps to take | |
# we'll start at random locations between 0 and 2000 | |
starting_guesses = 2000 * np.random.rand(nwalkers, ndim) | |
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[F, e]) | |
sampler.run_mcmc(starting_guesses, nsteps) | |
sample = sampler.chain # shape = (nwalkers, nsteps, ndim) | |
sample = sampler.chain[:, nburn:, :].ravel() # discard burn-in points | |
# Check result | |
plt.hist(sample, bins=50, histtype="stepfilled", alpha=0.3, normed=True) | |
# plot a best-fit Gaussian | |
F_fit = np.linspace(975, 1025) | |
pdf = stats.norm(np.mean(sample), np.std(sample)).pdf(F_fit) | |
plt.plot(F_fit, pdf, '-k') | |
plt.xlabel("F"); plt.ylabel("P(F)") | |
plt.show() | |
print(""" | |
F_true = {0} | |
F_est = {1:.0f} +/- {2:.0f} (based on {3} measurements) | |
""".format(F_true, np.mean(sample), np.std(sample), N)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment