Last active
February 20, 2018 16:48
-
-
Save ipashchenko/b8a489e540604cc75b697f545b7eabf2 to your computer and use it in GitHub Desktop.
robust GP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import numpy as np | |
import pymc3 as pm | |
import matplotlib.pyplot as plt | |
def fit_robustly(t, y, yerr, draws=500, chains=None, njobs=4, tune=500, | |
n_new=None, t_new=None, show_traceplot=False, | |
show_fitted_GP=True, file_fig=None, progressbar=True): | |
""" | |
Fit Latent Variable GP with Student Likelihood to data. | |
:param t: | |
Array of times. | |
:param y: | |
Array of values. | |
:param yerr: | |
Array of errors. | |
:param draws: (optional) | |
The number of samples to draw. Defaults to 500. The number of tuned | |
samples are discarded by default. (default: ``500``) | |
:param chains: (optional) | |
The number of chains to sample. Running independent chains | |
is important for some convergence statistics and can also | |
reveal multiple modes in the posterior. If ``None``, then set to | |
either ``njobs`` or 2, whichever is larger. (default: ``None``) | |
:param njobs: (optional) | |
The number of chains to run in parallel. If `None`, set to 4. Keep in | |
mind that some chains might themselves be multithreaded via openmp or | |
BLAS. In those cases it might be faster to set this to one. (default: | |
``None``) | |
:param tune: (optional) | |
Number of iterations to tune, if applicable (defaults to 500). Samplers | |
adjust the step sizes, scalings or similar during tuning. These samples | |
will be drawn in addition to samples and discarded. | |
:param n_new: (optional) | |
Number of new points where to sample fitted GP. There will be ``n_new`` | |
points evently distributed in ``(t[0], t[-1])`` interval. If ``None`` | |
then time points where to sample fitted GP must be specified in | |
``t_new`` argument. (default: ``None``) | |
:param t_new: (optional) | |
Array of times where to sample fitted GP. This allows more efficiently | |
span time intervals near flare maximum. If ``None`` then time points | |
where to sample fitted GP must be specified by ``n_new`` argument. | |
(default: ``None``) | |
:param show_traceplot: (optional) | |
Boolean - show results of MCMC? Function returns ``trace`` object that | |
can be pickled and viewed later. (default: ``False``) | |
:param show_fitted_GP: (optional) | |
Boolean - show fitted GP with original data? (default: ``True``) | |
:param file_fig: (optional) | |
File to save picture of fitted GP if ``show_fitted_GP=True``. If | |
``None`` then don't save picture. (default: ``None``) | |
:param progressbar: (optional) | |
Boolean. Whether or not to display a progress bar in the command line. | |
The bar shows the percentage of completion, the sampling speed in | |
samples per second (SPS), and the estimated remaining time until | |
completion ("expected time of arrival"; ETA). (default: ``True``) | |
:return: | |
``(trace, samples)`` or ``(trace, None)``, where ``trace`` is the result | |
of the MCMC that can be seen with ``pm.traceplot(trace)`` command. | |
``samples`` - 2D array with samples from fitted GP in user-specified | |
points. If both ``n_new`` and ``t_new`` are ``None`` then no samples are | |
returned and second returned value is ``None``. | |
:notes: | |
Points where to sample fitted GP could be specified with both ``t_new`` | |
and ``n_new`` but former has the priority. If none is specified than | |
no samples are returned. | |
""" | |
chains = max(njobs, 2) | |
with pm.Model() as model: | |
l = pm.HalfCauchy("l", beta=30) | |
eta = pm.HalfCauchy("eta", beta=5) | |
alpha = pm.HalfNormal("alpha", sd=1000) | |
# FIXME: Check that ``l`` goes before ``alpha``:) | |
cov = eta**2*pm.gp.cov.RatQuad(1, l, alpha) | |
gp = pm.gp.Latent(cov_func=cov) | |
f = gp.prior("f", X=t) | |
nu = pm.Gamma("nu", alpha=2, beta=0.1) | |
y_ = pm.StudentT("y", mu=f, sd=yerr, nu=nu, observed=y) | |
trace = pm.sample(draws, njobs=njobs, chains=chains, tune=tune, | |
progressbar=progressbar) | |
if show_traceplot: | |
pm.traceplot(trace) | |
if show_fitted_GP: | |
fig = plt.figure(figsize=(12, 5)) | |
ax = fig.gca() | |
# Original light curve | |
ax.errorbar(t, y, yerr=yerr, fmt=".k") | |
from pymc3.gp.util import plot_gp_dist | |
# Fitted light curve | |
plot_gp_dist(ax, trace["f"], t) | |
if file_fig is not None: | |
fig.savefig(file_fig) | |
if t_new is None: | |
if n_new is None: | |
return trace, None | |
else: | |
X_new = np.linspace(t[0], t[-1], n_new)[:, None] | |
else: | |
X_new = t_new[:, None] | |
# add the GP conditional to the model, given the new X values | |
with model: | |
f_pred = gp.conditional("f_pred", X_new) | |
# Sample from the GP conditional distribution | |
with model: | |
# Dictionary, pred_samples['f_pred'] - array (#samples, len(X_new),) | |
pred_samples = pm.sample_ppc(trace, vars=[f_pred], samples=100) | |
return trace, pred_samples["f_pred"] | |
if __name__ == "__main__": | |
with open("0716+714_umrao_lc.pkl", "rb") as fo: | |
data = pickle.load(fo) | |
data15 = data[0] | |
# Use only part of light curve | |
t = data15[:, 0][500:600][:, None] | |
y = data15[:, 1][500:600] | |
y -= np.median(y) | |
yerr = data15[:, 2][500:600] | |
trace, samples = fit_robustly(t, y, yerr, draws=500, n_new=400, | |
file_fig="plot.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment