Skip to content

Instantly share code, notes, and snippets.

@megbedell
Created July 16, 2018 13:49
Show Gist options
  • Save megbedell/50fd2a0287ce16dabc522b9a7b4dd920 to your computer and use it in GitHub Desktop.
Save megbedell/50fd2a0287ce16dabc522b9a7b4dd920 to your computer and use it in GitHub Desktop.
thorium_fits
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# ### fitting lines to data with 2-dimensional uncertainties
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
import emcee
import corner
import scipy.optimize as op
# load up data and define some functions
# In[2]:
data = np.genfromtxt('Table_solartwins_Th_fulldata.txt', names=True, dtype=None, encoding=None)
# In[3]:
def vee(par):
# Hogg+2010 eqn 29
(m, b, lnjitter) = par
return 1./np.sqrt(1. + m**2) * np.asarray([-m, 1.])
def ortho_displacement(par, ys, xs):
# Hogg+2010 eqn 30
(m, b, lnjitter) = par
disp = np.zeros_like(ys)
for i, (y, x) in enumerate(zip(ys, xs)):
z0 = np.asarray([0.0, b])
zi = np.asarray([x, y])
disp[i] = np.dot( vee(par), zi - z0 )
return disp
def ortho_variance(par, dys, dxs):
# Hogg+2010 eqn 31
#(m, b, jitter) = par
var = np.zeros_like(dys)
for i, (dy, dx) in enumerate(zip(dys, dxs)):
cov = np.eye(2)
cov[0,0] = dx**2
cov[1,1] = dy**2 #+ jitter**2
var[i] = np.dot( np.dot(vee(par), cov), vee(par) )
return var
def twodlike(par, y, dy, x, dx):
# log(likelihood) considering errors in both x and y
# Hogg+2010 eqn 31 with jitter
(m, b, lnjitter) = par
delta = ortho_displacement(par, y, x)
sigmasq = ortho_variance(par, dy, dx) + np.exp(2.*lnjitter)
return -0.5 * np.sum(delta**2/sigmasq + np.log(sigmasq))
def lnprior(par):
(m, b, lnjitter) = par
if (-10. < m < 10.) and (-1. < b < 1.) and (-20. < lnjitter < 1.):
return -1.5 * np.log(1 + m*m) # from Vanderplas blog post
return -np.inf
#return 0.0 # flat prior
def lnprob(par, y, dy, x, dx):
lp = lnprior(par)
if not np.isfinite(lp):
return -np.inf
return lp + twodlike(par, y, dy, x, dx)
def bestfit(abund, err, age, age_err):
# fit
# parameters: (ys, y_errs, xs, x_errs)
nll = lambda *args: -lnprob(*args)
par0 = np.asarray([0.0, 0.0, 0.0])
result = op.minimize(nll, par0, args=(abund, err, age, age_err))
(m, b, lnjitter) = result['x']
return m, b, lnjitter
# (the references mentioned above are [Hogg+2010](http://adsabs.harvard.edu/cgi-bin/bib_query?arXiv:1008.4686) and [this Vanderplas blog post](https://jakevdp.github.io/blog/2014/06/14/frequentism-and-bayesianism-4-bayesian-in-python/#Prior-on-Slope-and-Intercept))
# remove the stars we won't use in fit
# In[4]:
stars_to_mask = ['HIP28066', 'HIP30476', 'HIP73241', 'HIP74432', 'HIP64150', 'SunVesta'] # do not use when fitting
mask = ~np.isin(data['StarID'], stars_to_mask)
data = data[mask]
# find the best-fit parameters for [Th/H]_ZAMS vs [Fe/H] (top panel of Figure 3)
# In[5]:
args = (data['ThH_z'], data['error'], data['FeH'], data['eFeH'])
bf = bestfit(*args)
m,b,lnj = bf # slope, intercept, ln(jitter)
# In[6]:
plt.errorbar(data['FeH'], data['ThH_z'], yerr=data['error'], xerr=data['eFeH'], fmt='o')
xs = np.arange(-0.15,0.15,0.01)
plt.plot(xs, m*xs+b)
# get errors with an MCMC
# In[7]:
get_ipython().run_cell_magic('time', '', 'ndim, nwalkers = 3, 20\nspread = [1e-3, 1e-2, 1e-4]\npos = [bf + spread*np.random.randn(ndim) for j in range(nwalkers)] # randomize starting values\nsampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args)\nsampler.run_mcmc(pos, 1000);')
# In[8]:
samples = sampler.chain[:, 100:, :].reshape((-1, ndim))
samples[:,-1] = np.exp(samples[:,-1])
fig = corner.corner(samples, labels=["$m$", "$b$", "$j$"])
#fig.savefig('ThH_z_FeH_emcee.png')
# In[9]:
m_16, m_50, m_84 = np.percentile(samples[:,0], [16,50,84])
m_errp = m_84 - m_50
m_errm = m_50 - m_16
print("slope = {0:.3e} +{1:.3e} -{2:.3e}".format(m_50, m_errp, m_errm))
# In[10]:
plt.hist(samples[:,0])
plt.axvline(0.0, color='red')
# now let's do this for [Th/Fe]_ZAMS vs [Fe/H]
# In[11]:
args = (data['ThFe_z'], data['error_1'], data['FeH'], data['eFeH'])
# In[12]:
bf = bestfit(*args)
m,b,lnj = bf # slope, intercept, ln(jitter)
# In[13]:
bf
# In[14]:
plt.errorbar(data['FeH'], data['ThFe_z'], yerr=data['error_1'], xerr=data['eFeH'], fmt='o')
xs = np.arange(-0.15,0.15,0.01)
plt.plot(xs, m*xs+b)
# In[15]:
get_ipython().run_cell_magic('time', '', 'ndim, nwalkers = 3, 20\nspread = [1e-5, 1e-3, 1e-4]\npos = [bf + spread*np.random.randn(ndim) for j in range(nwalkers)] # randomize starting values\nsampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args)\nsampler.run_mcmc(pos, 1000);')
# In[16]:
samples = sampler.chain[:, 100:, :].reshape((-1, ndim))
samples[:,-1] = np.exp(samples[:,-1])
fig = corner.corner(samples, labels=["$m$", "$b$", "$j$"])
# In[17]:
m_16, m_50, m_84 = np.percentile(samples[:,0], [16,50,84])
m_errp = m_84 - m_50
m_errm = m_50 - m_16
print("slope = {0:.3f} +{1:.3f} -{2:.3f}".format(m_50, m_errp, m_errm))
# Now that we've got a sense of how it works, we can do it faster for the fits in Figure 4 just by changing the args:
# In[20]:
args = (data['ThH_z'], data['error'], data['Age'], data['eAge']) #(ys, y_errors, xs, x_errors)
# In[19]:
bf = bestfit(*args)
m,b,lnj = bf # slope, intercept, ln(jitter)
ndim, nwalkers = 3, 20
spread = [1e-5, 1e-3, 1e-4]
pos = [bf + spread*np.random.randn(ndim) for j in range(nwalkers)] # randomize starting values
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args)
sampler.run_mcmc(pos, 1000)
samples = sampler.chain[:, 100:, :].reshape((-1, ndim))
samples[:,-1] = np.exp(samples[:,-1])
fig = corner.corner(samples, labels=["$m$", "$b$", "$j$"])
m_16, m_50, m_84 = np.percentile(samples[:,0], [16,50,84])
m_errp = m_84 - m_50
m_errm = m_50 - m_16
print("slope = {0:.3f} +{1:.3f} -{2:.3f}".format(m_50, m_errp, m_errm))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment