Skip to content

Instantly share code, notes, and snippets.

@ipashchenko
Last active February 20, 2018 16:48
Show Gist options
  • Save ipashchenko/b8a489e540604cc75b697f545b7eabf2 to your computer and use it in GitHub Desktop.
Save ipashchenko/b8a489e540604cc75b697f545b7eabf2 to your computer and use it in GitHub Desktop.
robust GP
import pickle
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
def fit_robustly(t, y, yerr, draws=500, chains=None, njobs=4, tune=500,
n_new=None, t_new=None, show_traceplot=False,
show_fitted_GP=True, file_fig=None, progressbar=True):
"""
Fit Latent Variable GP with Student Likelihood to data.
:param t:
Array of times.
:param y:
Array of values.
:param yerr:
Array of errors.
:param draws: (optional)
The number of samples to draw. Defaults to 500. The number of tuned
samples are discarded by default. (default: ``500``)
:param chains: (optional)
The number of chains to sample. Running independent chains
is important for some convergence statistics and can also
reveal multiple modes in the posterior. If ``None``, then set to
either ``njobs`` or 2, whichever is larger. (default: ``None``)
:param njobs: (optional)
The number of chains to run in parallel. If `None`, set to 4. Keep in
mind that some chains might themselves be multithreaded via openmp or
BLAS. In those cases it might be faster to set this to one. (default:
``None``)
:param tune: (optional)
Number of iterations to tune, if applicable (defaults to 500). Samplers
adjust the step sizes, scalings or similar during tuning. These samples
will be drawn in addition to samples and discarded.
:param n_new: (optional)
Number of new points where to sample fitted GP. There will be ``n_new``
points evently distributed in ``(t[0], t[-1])`` interval. If ``None``
then time points where to sample fitted GP must be specified in
``t_new`` argument. (default: ``None``)
:param t_new: (optional)
Array of times where to sample fitted GP. This allows more efficiently
span time intervals near flare maximum. If ``None`` then time points
where to sample fitted GP must be specified by ``n_new`` argument.
(default: ``None``)
:param show_traceplot: (optional)
Boolean - show results of MCMC? Function returns ``trace`` object that
can be pickled and viewed later. (default: ``False``)
:param show_fitted_GP: (optional)
Boolean - show fitted GP with original data? (default: ``True``)
:param file_fig: (optional)
File to save picture of fitted GP if ``show_fitted_GP=True``. If
``None`` then don't save picture. (default: ``None``)
:param progressbar: (optional)
Boolean. Whether or not to display a progress bar in the command line.
The bar shows the percentage of completion, the sampling speed in
samples per second (SPS), and the estimated remaining time until
completion ("expected time of arrival"; ETA). (default: ``True``)
:return:
``(trace, samples)`` or ``(trace, None)``, where ``trace`` is the result
of the MCMC that can be seen with ``pm.traceplot(trace)`` command.
``samples`` - 2D array with samples from fitted GP in user-specified
points. If both ``n_new`` and ``t_new`` are ``None`` then no samples are
returned and second returned value is ``None``.
:notes:
Points where to sample fitted GP could be specified with both ``t_new``
and ``n_new`` but former has the priority. If none is specified than
no samples are returned.
"""
chains = max(njobs, 2)
with pm.Model() as model:
l = pm.HalfCauchy("l", beta=30)
eta = pm.HalfCauchy("eta", beta=5)
alpha = pm.HalfNormal("alpha", sd=1000)
# FIXME: Check that ``l`` goes before ``alpha``:)
cov = eta**2*pm.gp.cov.RatQuad(1, l, alpha)
gp = pm.gp.Latent(cov_func=cov)
f = gp.prior("f", X=t)
nu = pm.Gamma("nu", alpha=2, beta=0.1)
y_ = pm.StudentT("y", mu=f, sd=yerr, nu=nu, observed=y)
trace = pm.sample(draws, njobs=njobs, chains=chains, tune=tune,
progressbar=progressbar)
if show_traceplot:
pm.traceplot(trace)
if show_fitted_GP:
fig = plt.figure(figsize=(12, 5))
ax = fig.gca()
# Original light curve
ax.errorbar(t, y, yerr=yerr, fmt=".k")
from pymc3.gp.util import plot_gp_dist
# Fitted light curve
plot_gp_dist(ax, trace["f"], t)
if file_fig is not None:
fig.savefig(file_fig)
if t_new is None:
if n_new is None:
return trace, None
else:
X_new = np.linspace(t[0], t[-1], n_new)[:, None]
else:
X_new = t_new[:, None]
# add the GP conditional to the model, given the new X values
with model:
f_pred = gp.conditional("f_pred", X_new)
# Sample from the GP conditional distribution
with model:
# Dictionary, pred_samples['f_pred'] - array (#samples, len(X_new),)
pred_samples = pm.sample_ppc(trace, vars=[f_pred], samples=100)
return trace, pred_samples["f_pred"]
if __name__ == "__main__":
with open("0716+714_umrao_lc.pkl", "rb") as fo:
data = pickle.load(fo)
data15 = data[0]
# Use only part of light curve
t = data15[:, 0][500:600][:, None]
y = data15[:, 1][500:600]
y -= np.median(y)
yerr = data15[:, 2][500:600]
trace, samples = fit_robustly(t, y, yerr, draws=500, n_new=400,
file_fig="plot.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment