Created
November 6, 2010 18:46
-
-
Save roban/665605 to your computer and use it in GitHub Desktop.
The plot2Ddist function plots the joint distribution of two variables, with estimated density contours and marginal histograms. Designed to plot paramter distributions for MCMC samples.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pylab | |
import numpy | |
import pymc | |
import matplotlib.patches | |
from mpl_toolkits.axes_grid1 import make_axes_locatable | |
import scipy.stats | |
def frac_inside_poly(x,y,polyxy): | |
"""Calculate the fraction of points x,y inside polygon polyxy. | |
polyxy -- list of x,y coordinates of vertices. | |
""" | |
xy = numpy.vstack([x,y]).transpose() | |
return float(sum(matplotlib.nxutils.points_inside_poly(xy, polyxy)))/len(x) | |
def fracs_inside_contours(x, y, contours): | |
"""Calculate the fraction of points x,y inside each contour level. | |
contours -- a matplotlib.contour.QuadContourSet | |
""" | |
fracs = [] | |
for (icollection, collection) in enumerate(contours.collections): | |
path = collection.get_paths()[0] | |
pathxy = path.vertices | |
frac = frac_inside_poly(x,y,pathxy) | |
fracs.append(frac) | |
return fracs | |
def frac_label_contours(x, y, contours, format='%.3f'): | |
"""Label contours according to the fraction of points x,y inside. | |
""" | |
fracs = fracs_inside_contours(x,y,contours) | |
levels = contours.levels | |
labels = {} | |
for (level, frac) in zip(levels, fracs): | |
labels[level] = format % frac | |
contours.clabel(fmt=labels) | |
def contour_enclosing(x, y, fractions, xgrid, ygrid, zvals, | |
axes, nstart = 200, | |
*args, **kwargs): | |
"""Plot contours encompassing specified fractions of points x,y. | |
""" | |
# Generate a large set of contours initially. | |
contours = axes.contour(xgrid, ygrid, zvals, nstart, | |
extend='both') | |
# Set up fracs and levs for interpolation. | |
levs = contours.levels | |
fracs = numpy.array(fracs_inside_contours(x,y,contours)) | |
sortinds = numpy.argsort(fracs) | |
levs = levs[sortinds] | |
fracs = fracs[sortinds] | |
# Find the levels that give the specified fractions. | |
levels = scipy.interp(fractions, fracs, levs) | |
# Remove the old contours from the graph. | |
for coll in contours.collections: | |
coll.remove() | |
# Reset the contours | |
contours.__init__(axes, xgrid, ygrid, zvals, levels, *args, **kwargs) | |
return contours | |
def plot2Ddist(variables, axeslist=None, truevalues=None, | |
trimto=None, thin=1, histbinslist=[100, 100], | |
labels=None, scaleview=True, | |
plotscatter=True, plothists=True, plotcontours=True, | |
contourKDEthin=1, contourNGrid=100, | |
contourFractions=[0.6827, 0.9545, 0.9973], | |
labelcontours=True, returncontours=False, | |
scatterstyle={}, histstyle={}, contourstyle={}, **styleArgs): | |
"""Plot joint distribution of two variables, with marginal histograms. | |
The resulting graphic includes (at your discretion): | |
* a scatter plot of the 2D distribution of the two variables | |
* estimated density contours for the distribution | |
* marginal histograms for each variable | |
See plot2Ddist_example.py for an example: | |
> plot2Ddist([a, b], truevalues=[intercept, slope], **styleargs) | |
Notes | |
----- | |
The contour plotting can be quite slow for large samples because | |
of the gaussian kernel density estimation. Try passing a larger | |
value for contourKDEthin to speed it up. | |
Inputs | |
------ | |
variables -- list-like of length 2 | |
a list of two array-like or pymc.Variable objects. The lengths | |
of the arrays or variable traces should be equal. | |
axeslist -- list-like of length 3 | |
a list of three Matplotlib Axes for: the joint plot, marginal | |
x histogram, and marginal y histogram, respectively. | |
truevalues -- list-like of length 2 | |
a list of the true values for each variable | |
trimto -- int | |
plot only the last trimto elements of each variable | |
thin -- int | |
plot only every thin-th element of each variable | |
histbinlist -- list-like of length 2 | |
specify the bins (number or limits) for x and y marginal histograms. | |
labels -- list-like of two strings | |
the x and y axis labels | |
scaleview -- bool | |
whether to set the axes limits according to the plotted data | |
plotscatter, plothists, plotcontours -- bool | |
whether to plot the scatter, marginal histograms, and contours | |
scatterstyle, histstyle, contourstyle -- dict-like | |
additional keyword arguments for the plot, hist, or contour commands | |
contourKDEthin -- int | |
factor by which to thin the samples before calculating the | |
gaussian kernel density estimate for contouring | |
contourNGrid -- int | |
size of the grid to use (in each dimension) for the contour plotting | |
contourFractions -- list-like | |
countours are chosen to include the fractions of points specified here | |
labelcontours -- bool | |
whether to label the contours with the fraction of points enclosed | |
styleArgs -- | |
leftover arguments are passed to both the plot and hist commands | |
""" | |
### Set up figures and axes. ### | |
if axeslist is None: | |
fig1 = pylab.figure(figsize=(6,6)) | |
fig1.set_label('traces') | |
ax1 = pylab.gca() | |
divider = make_axes_locatable(ax1) | |
ax2 = divider.append_axes("top", 1.5, pad=0.0, sharex=ax1) | |
ax3 = divider.append_axes("right", 1.5, pad=0.0, sharey=ax1) | |
for tl in (ax2.get_xticklabels() + ax2.get_yticklabels() + | |
ax3.get_xticklabels() + ax3.get_yticklabels()): | |
tl.set_visible(False) | |
axeslist = (ax1, ax2, ax3) | |
else: | |
ax1, ax2, ax3 = axeslist | |
# Thin and trim variables. | |
if labels is None: | |
passedlabels = False | |
labels = [None, None] | |
else: | |
passedlabels = True | |
for (ivar, variable) in enumerate(variables): | |
# Get the trace if this is a pymc.Variable object. | |
if isinstance(variable, pymc.Variable): | |
variables[ivar] = variable.trace() | |
if hasattr(variable, '__name__') and not passedlabels: | |
labels[ivar] = variable.__name__ | |
if trimto is None: | |
trimto = len(variables[0]) | |
x = variables[0][-trimto::thin] | |
y = variables[1][-trimto::thin] | |
### Plot the variables. ### | |
# Plot 2D scatter of variables. | |
if plotscatter: | |
style = {'ls':'', 'marker':',', 'color':'r', 'alpha':'0.5'} | |
style.update(styleArgs) | |
style.update(scatterstyle) | |
ax1.plot(x, y, **style) | |
if plotcontours: | |
xkde = variables[0][-trimto::contourKDEthin] | |
ykde = variables[1][-trimto::contourKDEthin] | |
# Inspired by Abraham Flaxman's https://gist.github.com/626689 | |
style = {'linewidths':2.0, 'alpha':0.75, 'colors':'k', | |
#'cmap':matplotlib.cm.Greys, | |
'zorder':10} | |
style.update(styleArgs) | |
style.update(contourstyle) | |
if 'color' in style: | |
style['colors'] = style['color'] | |
gkde = scipy.stats.gaussian_kde([xkde,ykde]) | |
xgrid, ygrid = numpy.mgrid[min(x):max(x):contourNGrid * 1j, | |
min(y):max(y):contourNGrid * 1j] | |
zvals = numpy.array(gkde.evaluate([xgrid.flatten(), | |
ygrid.flatten()]) | |
).reshape(xgrid.shape) | |
contours = contour_enclosing(x, y, contourFractions, | |
xgrid, ygrid, zvals, | |
ax1, **style) | |
# Plot marginal histograms. | |
if plothists: | |
style = {'histtype':'step', 'normed':True, 'color':'k'} | |
style.update(styleArgs) | |
style.update(histstyle) | |
ax2.hist(x, histbinslist[0], **style) | |
ax3.hist(y, histbinslist[1], orientation='horizontal', **style) | |
# Plot lines for the true values. | |
if truevalues is not None: | |
ax1.axvline(x=truevalues[0], ls=':', c='k') | |
ax1.axhline(y=truevalues[1], ls=':', c='k') | |
ax2.axvline(x=truevalues[0], ls=':', c='k') | |
ax3.axhline(y=truevalues[1], ls=':', c='k') | |
if scaleview: | |
ax2.relim() | |
ax3.relim() | |
ax1.relim() | |
ax2.autoscale_view(tight=True) | |
ax3.autoscale_view(tight=True) | |
ax1.autoscale_view(tight=True) | |
ax2.set_ylim(bottom=0) | |
ax3.set_xlim(left=0) | |
if labels[0] is not None: | |
ax1.set_xlabel(labels[0]) | |
if labels[1] is not None: | |
ax1.set_ylabel(labels[1]) | |
if plotcontours and labelcontours: | |
frac_label_contours(x, y, contours) | |
if plotcontours and returncontours: | |
return axeslist, contours | |
else: | |
return axeslist |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""An example of plot2Ddist. | |
Based on an example by Anand Patil: | |
http://groups.google.com/group/pymc/browse_thread/thread/afe91c4d9440d6da | |
""" | |
import pymc as pm | |
import numpy as np | |
import matplotlib.pyplot as pl | |
# Set up parameters. Our model is y = a + b x + c x^2 | |
a = pm.Uninformative('a',0) | |
b = pm.Uninformative('b',0) | |
c = pm.Uninformative('c',0) | |
v = pm.OneOverX('v',1) | |
# Locations of observations. | |
nx = 10 | |
span = 5.0 | |
x = np.linspace(-span/2,span/2,nx) | |
# Generate mock set of data. | |
slope = 2.0 | |
intercept = 1.0 | |
curve = 3.0 | |
sigma = 3.0 | |
data = (x**2)*curve + x*slope - intercept + np.random.normal(size=len(x))*sigma | |
# Observed variable. | |
y = pm.Normal('y',a+b*x+c*x**2,1./v,value=data,observed=True) | |
ypred = pm.Normal('ypred',a+b*x+c*x**2,1./v) | |
# Set up MCMC chain. | |
M = pm.MCMC([a,b,c,v,y,ypred]) | |
M.use_step_method(pm.AdaptiveMetropolis, [a,b,c,v]) | |
# Sample. | |
M.isample(20000,4000,10) | |
# Plot the function and envelope. | |
pl.figure() | |
env = pm.Matplot.func_envelopes(ypred) | |
for e in env: | |
e.display(x,.5,new=False) | |
pl.plot(x,data,'r.',markersize=4) | |
# Plot the parameter distribution from the samples. | |
from plot2Ddist import plot2Ddist | |
scatterstyle={'color':'r', 'alpha':0.5} | |
styleargs = {'color':'k', 'scatterstyle':scatterstyle} | |
plot2Ddist([a, b], truevalues=[intercept, slope], **styleargs) | |
plot2Ddist([a, c], truevalues=[intercept, curve], **styleargs) | |
plot2Ddist([b, c], truevalues=[slope, curve], **styleargs) | |
#plot2Ddist([a, v], truevalues=[intercept, sigma**2], **styleargs) | |
#plot2Ddist([b, v], truevalues=[slope, sigma**2], **styleargs) | |
#plot2Ddist([c, v], truevalues=[curve, sigma**2], **styleargs) | |
pl.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The plot2Ddist function plots the joint distribution of two variables, with estimated density contours and marginal histograms.
Also includes code allowing the contours to be specified by the fraction of points contained inside them, which can be used to plot, for example, Bayesian credible regions (also called confidence regions) from samples in a Monte Carlo Markov Chain.
Run
python plot2Ddist_example.py
to see example plots of pymc chains (three using this code, one using the func_envelope function in pymc).
See: http://groups.google.com/group/pymc/browse_thread/thread/afe91c4d9440d6da