Skip to content

Instantly share code, notes, and snippets.

@ipashchenko
Last active February 23, 2018 12:10
Show Gist options
  • Save ipashchenko/a060865a96dbbbba2d7e99e08e8ba53d to your computer and use it in GitHub Desktop.
Save ipashchenko/a060865a96dbbbba2d7e99e08e8ba53d to your computer and use it in GitHub Desktop.
Full Bayes on RQ+WN with george and emcee
import numpy as np
import george
import emcee
import pickle
from functools import partial
import scipy.optimize as op
import matplotlib.pyplot as plt
from tqdm import tqdm
def nll(p, y, gp):
"""
:param p:
Mean, log(Disp_WN), log(A**2), log(alpha), log(scale**2)
"""
gp.set_parameter_vector(p)
ll = gp.log_likelihood(y, quiet=True)
return -ll if np.isfinite(ll) else 1e25
def grad_nll(p, y, gp):
"""
:param p:
Mean, log(Disp_WN), log(A**2), log(alpha), log(scale**2)
"""
gp.set_parameter_vector(p)
return -gp.grad_log_likelihood(y, quiet=True)
def log_probability(params, gp):
gp.set_parameter_vector(params)
lp = gp.log_prior()
if not np.isfinite(lp):
return -np.inf
return gp.log_likelihood(y)+lp
def mcmc_gp(t, y, yerr, p0=None, t_new=None, n_new=None, n_samples=100,
show_fig=False, file_fig=None, color="#1f77b4", fig=None,
label=None, n_burn_in=100, n_prod=100, chain_file=None,
n_walkers=24, threads=1, thin=5):
"""
MCMC GP hyperparameters for given data.
:param t:
Array of times.
:param y:
Array of the observed values.
:param yerr:
Array of the observed errors.
:param p0: (optional)
Initial values for the hyperparameters ("Amp", "log_alpha", "tau",
"wn_disp").
:param n_new: (optional)
Number of new points where to sample fitted GP. There will be ``n_new``
points evently distributed in ``(t[0], t[-1])`` interval. If ``None``
then time points where to sample fitted GP must be specified in
``t_new`` argument. (default: ``None``)
:param t_new: (optional)
Array of times where to sample fitted GP. This allows more efficiently
span time intervals near flare maximum. If ``None`` then time points
where to sample fitted GP must be specified by ``n_new`` argument.
(default: ``None``)
:param n_samples: (optional)
Number of samples from GP posterior to realize on time points specified
by ``n_new`` or ``t_new`` arguments. (default: ``100``)
:param show_fig: (optional)
Show figure? (default: ``False``)
:param file_fig: (optional)
File to save picture of fitted GP. If ``None`` then don't save picture.
(default: ``None``)
:param color: (optional)
Color for plots. If ``None`` then use standard. (default: ``None``)
:param fig: (optional)
Figure to plot. If ``None`` then create one. (default: ``None``)
:param label: (optional)
Label for the plot. If ``None`` then don't label. (default: ``None``)
:param n_burn_in: (optional)
Burnin size. (default: ``100``)
:param n_prod: (optional)
Production size. (default: ```100``)
:param n_walkers: (optional)
Number of walkers in ``emcee``. (default: ``24``)
:param thin: (optional)
Thin samples before selecting randomly ``n_samples``. (default: ``10``)
:return:
Dictionary with results.
:notes:
Points where to sample fitted GP could be specified with both ``t_new``
and ``n_new`` but former has the priority. If none is specified than
samples for those points in ``t`` are returned and plotted.
"""
if chain_file is not None:
fn = open(chain_file, "w")
fn.close()
if p0 is None:
p0 = [np.mean(y), 0.2, 1, -2, 200]
kernel = p0[2]**2*george.kernels.RationalQuadraticKernel(log_alpha=p0[3],
metric=p0[4]**2)
gp = george.GP(kernel, mean=np.mean(y), fit_mean=True,
white_noise=np.log(p0[1]**2), fit_white_noise=True)
nll_ = partial(nll, y=y, gp=gp)
grad_nll_ = partial(grad_nll, y=y, gp=gp)
gp.compute(t, yerr)
print(gp.log_likelihood(y))
p0 = gp.get_parameter_vector()
results = op.minimize(nll_, p0, jac=grad_nll_, method="L-BFGS-B")
gp.set_parameter_vector(results.x)
print("HP : {}".format(results.x))
print(gp.log_likelihood(y))
p0 = gp.get_parameter_vector()
ndim = len(p0)
sampler = emcee.EnsembleSampler(n_walkers, ndim, log_probability,
args=(gp,), threads=threads)
p0 = p0+1e-2*np.random.randn(n_walkers, ndim)
print("Running burn-in...")
for p0, lp, _ in tqdm(sampler.sample(p0, iterations=n_burn_in,
storechain=False)):
pass
sampler.reset()
print("Running production...")
for p0, lp, _ in tqdm(sampler.sample(p0, iterations=n_prod,
storechain=True)):
if chain_file is not None:
f = open(chain_file, "a")
for k in range(p0.shape[0]):
f.write("{0:4d} {1:s}\n".format(k, " ".join(p0[k])))
f.close()
if fig is None:
fig = plt.figure(figsize=(12, 5))
ax = fig.gca()
ax.errorbar(t, y, yerr=yerr, fmt=".", color=color, ms=3, elinewidth=0.5,
label=label)
if t_new is None:
if n_new is None:
t_new = t
else:
t_new = np.linspace(t[0], t[-1], n_new)
# Plot at least 24 HP posterior samples.
gp_samples = list()
grad_samples = list()
mu_samples = list()
samples = sampler.flatchain[::thin]
for i, s in enumerate(samples[np.random.randint(len(samples),
size=n_samples)]):
gp.set_parameter_vector(s)
sample = gp.sample_conditional(y, t_new, size=1)
grad_sample = np.gradient(sample)
mu = gp.predict(y, t_new, return_cov=False)
gp_samples.append(sample)
grad_samples.append(grad_sample)
mu_samples.append(mu)
if i < 25:
ax.plot(t_new, sample, color=color, alpha=0.1)
ax.plot(t_new, grad_sample, color="#ff7f0e", alpha=0.1)
ax.axhline(0)
ax.set_title("Sample from HP posterior")
ax.set_xlabel("Time, MJD")
ax.set_ylabel("Flux, Jy")
plt.gca().yaxis.set_major_locator(plt.MaxNLocator(5))
if label is not None:
fig.legend(loc="best")
if show_fig:
fig.show()
if file_fig is not None:
fig.savefig(file_fig)
return {"sampler": sampler, "fig": fig, "t_new": t_new, "t": t, "y": y,
"yerr": yerr, "gp_samples": gp_samples, "mu_samples": mu_samples,
"grad_samples": grad_samples}
if __name__ == "__main__":
with open("0716+714_umrao_lc.pkl", "rb") as fo:
data = pickle.load(fo)
data15 = data[0]
# Use only part of the light curve to make it usable
t = data15[:, 0][:917]
y = data15[:, 1][:917]
yerr = data15[:, 2][:917]
result = mcmc_gp(t, y, yerr, n_new=2000, n_samples=100, show_fig=True,
n_burn_in=100, n_prod=100, threads=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment