Last active
February 23, 2018 12:10
-
-
Save ipashchenko/a060865a96dbbbba2d7e99e08e8ba53d to your computer and use it in GitHub Desktop.
Full Bayes on RQ+WN with george and emcee
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import george | |
import emcee | |
import pickle | |
from functools import partial | |
import scipy.optimize as op | |
import matplotlib.pyplot as plt | |
from tqdm import tqdm | |
def nll(p, y, gp): | |
""" | |
:param p: | |
Mean, log(Disp_WN), log(A**2), log(alpha), log(scale**2) | |
""" | |
gp.set_parameter_vector(p) | |
ll = gp.log_likelihood(y, quiet=True) | |
return -ll if np.isfinite(ll) else 1e25 | |
def grad_nll(p, y, gp): | |
""" | |
:param p: | |
Mean, log(Disp_WN), log(A**2), log(alpha), log(scale**2) | |
""" | |
gp.set_parameter_vector(p) | |
return -gp.grad_log_likelihood(y, quiet=True) | |
def log_probability(params, gp): | |
gp.set_parameter_vector(params) | |
lp = gp.log_prior() | |
if not np.isfinite(lp): | |
return -np.inf | |
return gp.log_likelihood(y)+lp | |
def mcmc_gp(t, y, yerr, p0=None, t_new=None, n_new=None, n_samples=100, | |
show_fig=False, file_fig=None, color="#1f77b4", fig=None, | |
label=None, n_burn_in=100, n_prod=100, chain_file=None, | |
n_walkers=24, threads=1, thin=5): | |
""" | |
MCMC GP hyperparameters for given data. | |
:param t: | |
Array of times. | |
:param y: | |
Array of the observed values. | |
:param yerr: | |
Array of the observed errors. | |
:param p0: (optional) | |
Initial values for the hyperparameters ("Amp", "log_alpha", "tau", | |
"wn_disp"). | |
:param n_new: (optional) | |
Number of new points where to sample fitted GP. There will be ``n_new`` | |
points evently distributed in ``(t[0], t[-1])`` interval. If ``None`` | |
then time points where to sample fitted GP must be specified in | |
``t_new`` argument. (default: ``None``) | |
:param t_new: (optional) | |
Array of times where to sample fitted GP. This allows more efficiently | |
span time intervals near flare maximum. If ``None`` then time points | |
where to sample fitted GP must be specified by ``n_new`` argument. | |
(default: ``None``) | |
:param n_samples: (optional) | |
Number of samples from GP posterior to realize on time points specified | |
by ``n_new`` or ``t_new`` arguments. (default: ``100``) | |
:param show_fig: (optional) | |
Show figure? (default: ``False``) | |
:param file_fig: (optional) | |
File to save picture of fitted GP. If ``None`` then don't save picture. | |
(default: ``None``) | |
:param color: (optional) | |
Color for plots. If ``None`` then use standard. (default: ``None``) | |
:param fig: (optional) | |
Figure to plot. If ``None`` then create one. (default: ``None``) | |
:param label: (optional) | |
Label for the plot. If ``None`` then don't label. (default: ``None``) | |
:param n_burn_in: (optional) | |
Burnin size. (default: ``100``) | |
:param n_prod: (optional) | |
Production size. (default: ```100``) | |
:param n_walkers: (optional) | |
Number of walkers in ``emcee``. (default: ``24``) | |
:param thin: (optional) | |
Thin samples before selecting randomly ``n_samples``. (default: ``10``) | |
:return: | |
Dictionary with results. | |
:notes: | |
Points where to sample fitted GP could be specified with both ``t_new`` | |
and ``n_new`` but former has the priority. If none is specified than | |
samples for those points in ``t`` are returned and plotted. | |
""" | |
if chain_file is not None: | |
fn = open(chain_file, "w") | |
fn.close() | |
if p0 is None: | |
p0 = [np.mean(y), 0.2, 1, -2, 200] | |
kernel = p0[2]**2*george.kernels.RationalQuadraticKernel(log_alpha=p0[3], | |
metric=p0[4]**2) | |
gp = george.GP(kernel, mean=np.mean(y), fit_mean=True, | |
white_noise=np.log(p0[1]**2), fit_white_noise=True) | |
nll_ = partial(nll, y=y, gp=gp) | |
grad_nll_ = partial(grad_nll, y=y, gp=gp) | |
gp.compute(t, yerr) | |
print(gp.log_likelihood(y)) | |
p0 = gp.get_parameter_vector() | |
results = op.minimize(nll_, p0, jac=grad_nll_, method="L-BFGS-B") | |
gp.set_parameter_vector(results.x) | |
print("HP : {}".format(results.x)) | |
print(gp.log_likelihood(y)) | |
p0 = gp.get_parameter_vector() | |
ndim = len(p0) | |
sampler = emcee.EnsembleSampler(n_walkers, ndim, log_probability, | |
args=(gp,), threads=threads) | |
p0 = p0+1e-2*np.random.randn(n_walkers, ndim) | |
print("Running burn-in...") | |
for p0, lp, _ in tqdm(sampler.sample(p0, iterations=n_burn_in, | |
storechain=False)): | |
pass | |
sampler.reset() | |
print("Running production...") | |
for p0, lp, _ in tqdm(sampler.sample(p0, iterations=n_prod, | |
storechain=True)): | |
if chain_file is not None: | |
f = open(chain_file, "a") | |
for k in range(p0.shape[0]): | |
f.write("{0:4d} {1:s}\n".format(k, " ".join(p0[k]))) | |
f.close() | |
if fig is None: | |
fig = plt.figure(figsize=(12, 5)) | |
ax = fig.gca() | |
ax.errorbar(t, y, yerr=yerr, fmt=".", color=color, ms=3, elinewidth=0.5, | |
label=label) | |
if t_new is None: | |
if n_new is None: | |
t_new = t | |
else: | |
t_new = np.linspace(t[0], t[-1], n_new) | |
# Plot at least 24 HP posterior samples. | |
gp_samples = list() | |
grad_samples = list() | |
mu_samples = list() | |
samples = sampler.flatchain[::thin] | |
for i, s in enumerate(samples[np.random.randint(len(samples), | |
size=n_samples)]): | |
gp.set_parameter_vector(s) | |
sample = gp.sample_conditional(y, t_new, size=1) | |
grad_sample = np.gradient(sample) | |
mu = gp.predict(y, t_new, return_cov=False) | |
gp_samples.append(sample) | |
grad_samples.append(grad_sample) | |
mu_samples.append(mu) | |
if i < 25: | |
ax.plot(t_new, sample, color=color, alpha=0.1) | |
ax.plot(t_new, grad_sample, color="#ff7f0e", alpha=0.1) | |
ax.axhline(0) | |
ax.set_title("Sample from HP posterior") | |
ax.set_xlabel("Time, MJD") | |
ax.set_ylabel("Flux, Jy") | |
plt.gca().yaxis.set_major_locator(plt.MaxNLocator(5)) | |
if label is not None: | |
fig.legend(loc="best") | |
if show_fig: | |
fig.show() | |
if file_fig is not None: | |
fig.savefig(file_fig) | |
return {"sampler": sampler, "fig": fig, "t_new": t_new, "t": t, "y": y, | |
"yerr": yerr, "gp_samples": gp_samples, "mu_samples": mu_samples, | |
"grad_samples": grad_samples} | |
if __name__ == "__main__": | |
with open("0716+714_umrao_lc.pkl", "rb") as fo: | |
data = pickle.load(fo) | |
data15 = data[0] | |
# Use only part of the light curve to make it usable | |
t = data15[:, 0][:917] | |
y = data15[:, 1][:917] | |
yerr = data15[:, 2][:917] | |
result = mcmc_gp(t, y, yerr, n_new=2000, n_samples=100, show_fig=True, | |
n_burn_in=100, n_prod=100, threads=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment