straypacket · August 29, 2015 13:57
diff --git a/freq_vs_bayes.py b/freq_vs_bayes.py
 import numpy as np
 from scipy import stats
 np.random.seed(1)

 F_true = 1000  # random range
 N = 50 # number of measurements
 F = stats.poisson(F_true).rvs(N)
 e = np.sqrt(F)  # errors on Poisson counts estimated via square root


 import matplotlib.pyplot as plt

 fig, ax = plt.subplots()
 ax.errorbar(F, np.arange(N), xerr=e, fmt='ok', ecolor='gray', alpha=0.5)
 ax.vlines([F_true], 0, N, linewidth=5, alpha=0.2)
 ax.set_xlabel("Range")
 ax.set_ylabel("Measurement number")

 plt.show()

 # Frequentism approach
 # Based on likelihood function L(D|F_true)
 # L(D|F_true) = Prod_1_to_n(P(D_n|F_true))
 w = 1. / e ** 2
 print("""
  F_true = {0}
  F_est  = {1:.0f} +/- {2:.0f} (based on {3} measurements)
  """.format(F_true, (w * F).sum() / w.sum(), w.sum() ** -0.5, N))
 
 # From the current 50 samples, our error is of 0.4%
 #
 #F_true = 1000
 #F_est  = 998 +/- 4 (based on 50 measurements)

 # Bayesianism approach
 # Bayes law:
 # P(F_true|D) = (P(D|F_true) P(F_true)) / P(D)
 #
 # P(F_true|D) => Posterior
 # P(D|F_True) => Likelihood (similar to frequentist)
 # P(F_true)   => Prior (what we knew before applying data)
 # P(D)        => Data probability (normalization term)
 # 
 # If P(F_true) == 1
 # P(F_true|D) == L(D|F_true)
 #
 # Proof using MCMC

 import emcee

 def log_prior(theta):
  return 1  # flat prior

 def log_likelihood(theta, F, e):
  return -0.5 * np.sum(np.log(2 * np.pi * e ** 2) + (F - theta[0]) ** 2 / e ** 2)

 def log_posterior(theta, F, e):
  return log_prior(theta) + log_likelihood(theta, F, e)

 # Drawing samples from posterior
 ndim = 1  # number of parameters in the model
 nwalkers = 50  # number of MCMC walkers
 nburn = 1000  # "burn-in" period to let chains stabilize
 nsteps = 2000  # number of MCMC steps to take

 # we'll start at random locations between 0 and 2000
 starting_guesses = 2000 * np.random.rand(nwalkers, ndim)

 sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[F, e])
 sampler.run_mcmc(starting_guesses, nsteps)

 sample = sampler.chain  # shape = (nwalkers, nsteps, ndim)
 sample = sampler.chain[:, nburn:, :].ravel()  # discard burn-in points

 # Check result
 plt.hist(sample, bins=50, histtype="stepfilled", alpha=0.3, normed=True)

 # plot a best-fit Gaussian
 F_fit = np.linspace(975, 1025)
 pdf = stats.norm(np.mean(sample), np.std(sample)).pdf(F_fit)

 plt.plot(F_fit, pdf, '-k')
 plt.xlabel("F"); plt.ylabel("P(F)")

 plt.show()

 print("""
  F_true = {0}
  F_est  = {1:.0f} +/- {2:.0f} (based on {3} measurements)
  """.format(F_true, np.mean(sample), np.std(sample), N))
	import numpy as np
	from scipy import stats
	np.random.seed(1)

	F_true = 1000 # random range
	N = 50 # number of measurements
	F = stats.poisson(F_true).rvs(N)
	e = np.sqrt(F) # errors on Poisson counts estimated via square root


	import matplotlib.pyplot as plt

	fig, ax = plt.subplots()
	ax.errorbar(F, np.arange(N), xerr=e, fmt='ok', ecolor='gray', alpha=0.5)
	ax.vlines([F_true], 0, N, linewidth=5, alpha=0.2)
	ax.set_xlabel("Range")
	ax.set_ylabel("Measurement number")

	plt.show()

	# Frequentism approach
	# Based on likelihood function L(D\|F_true)
	# L(D\|F_true) = Prod_1_to_n(P(D_n\|F_true))
	w = 1. / e ** 2
	print("""
	F_true = {0}
	F_est = {1:.0f} +/- {2:.0f} (based on {3} measurements)
	""".format(F_true, (w * F).sum() / w.sum(), w.sum() ** -0.5, N))

	# From the current 50 samples, our error is of 0.4%
	#
	#F_true = 1000
	#F_est = 998 +/- 4 (based on 50 measurements)

	# Bayesianism approach
	# Bayes law:
	# P(F_true\|D) = (P(D\|F_true) P(F_true)) / P(D)
	#
	# P(F_true\|D) => Posterior
	# P(D\|F_True) => Likelihood (similar to frequentist)
	# P(F_true) => Prior (what we knew before applying data)
	# P(D) => Data probability (normalization term)
	#
	# If P(F_true) == 1
	# P(F_true\|D) == L(D\|F_true)
	#
	# Proof using MCMC

	import emcee

	def log_prior(theta):
	return 1 # flat prior

	def log_likelihood(theta, F, e):
	return -0.5 * np.sum(np.log(2 * np.pi * e 2) + (F - theta[0]) 2 / e ** 2)

	def log_posterior(theta, F, e):
	return log_prior(theta) + log_likelihood(theta, F, e)

	# Drawing samples from posterior
	ndim = 1 # number of parameters in the model
	nwalkers = 50 # number of MCMC walkers
	nburn = 1000 # "burn-in" period to let chains stabilize
	nsteps = 2000 # number of MCMC steps to take

	# we'll start at random locations between 0 and 2000
	starting_guesses = 2000 * np.random.rand(nwalkers, ndim)

	sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[F, e])
	sampler.run_mcmc(starting_guesses, nsteps)

	sample = sampler.chain # shape = (nwalkers, nsteps, ndim)
	sample = sampler.chain[:, nburn:, :].ravel() # discard burn-in points

	# Check result
	plt.hist(sample, bins=50, histtype="stepfilled", alpha=0.3, normed=True)

	# plot a best-fit Gaussian
	F_fit = np.linspace(975, 1025)
	pdf = stats.norm(np.mean(sample), np.std(sample)).pdf(F_fit)

	plt.plot(F_fit, pdf, '-k')
	plt.xlabel("F"); plt.ylabel("P(F)")

	plt.show()

	print("""
	F_true = {0}
	F_est = {1:.0f} +/- {2:.0f} (based on {3} measurements)
	""".format(F_true, np.mean(sample), np.std(sample), N))