Last active
March 7, 2023 00:51
-
-
Save camriddell/c5de7372fd57ee36d7e8448d66cea027 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import norm, uniform, gamma | |
from matplotlib.pyplot import rc, rcdefaults, subplots, imread, show | |
from matplotlib.lines import Line2D | |
from matplotlib.ticker import MultipleLocator | |
from matplotlib.transforms import blended_transform_factory | |
from matplotlib.offsetbox import VPacker, TextArea, AnchoredOffsetbox, DrawingArea | |
from matplotlib.image import BboxImage | |
from numpy.random import default_rng | |
from numpy import arange, linspace, broadcast_to | |
rcdefaults() | |
rc('figure', facecolor='white') | |
rc('font', size=16) | |
## All populations should have central tendency ~70, | |
# keeps things comparable across distributions | |
populations = [ | |
norm(loc=70, scale=3), uniform(60, 20), gamma(1.99, loc=65, scale=2) | |
] | |
fig, axes = subplots( | |
3, len(populations), figsize=(20, 16), | |
sharex=True, sharey='row', | |
gridspec_kw={ | |
'height_ratios': [1, 5, 1], 'hspace': 0, 'wspace': .1, | |
'bottom': .08, 'right': .9, 'left': .3, 'top': .9 | |
}, | |
dpi=104 | |
) | |
for pop, ax in zip(populations, axes[0]): | |
xs = linspace(*pop.ppf([.001, .999]), 4_000) | |
ax.fill_between(xs, pop.pdf(xs), alpha=.5, label='Distribution') | |
ax.axvline( | |
pop.mean(), 0, .95, ls='dashed', color='k', lw=2, label=r'Mean $\mu$' | |
) | |
ax.set_title(pop.dist.name.title(), fontsize='large') | |
ax.xaxis.set_tick_params(length=0) | |
ax.yaxis.set_tick_params(labelleft=False, length=0) | |
ax.margins(y=0) | |
### Running & Displaying the Simulation | |
rng = default_rng(0) | |
n_samples, sample_size = 100, 25 | |
samples = [ | |
p.rvs(size=(n_samples, sample_size), random_state=rng) | |
for p in populations | |
] | |
ys = arange(n_samples) | |
scatter_ys = broadcast_to(ys[:, None], (n_samples, sample_size)) | |
rng = default_rng(0) | |
for s, samp_ax in zip(samples, axes[1, :]): | |
smean, sdev = s.mean(axis=1), s.std(axis=1) | |
samp_ax.scatter( | |
s, y=scatter_ys, s=4, c='tab:blue', alpha=.7, label='Observations' | |
) | |
samp_ax.fill_betweenx( | |
ys, x1=smean - sdev, x2=smean + sdev, color='tab:orange', | |
alpha=.3, label='Std. Dev. $S_x$' | |
) | |
samp_ax.plot(smean, ys, color='tab:orange', label=r'Mean $\bar{x}$') | |
samp_ax.xaxis.set_tick_params(length=0) | |
### Creating the Sampling Distributions | |
for s, mean_ax in zip(samples, axes[2, :]): | |
smean, sdev = s.mean(axis=1), s.std(axis=1) | |
mean_ax.axvline( | |
smean.mean(), ymin=.5, ymax=.9, c='k', ls='dashed', | |
lw=2, label='Est. Pop. Mean $\hat{\mu}$' | |
) | |
norm_density = norm(*norm.fit(smean)) | |
xs = linspace(*norm_density.ppf([.001, .999]), 4000) | |
mean_ax.fill_between( | |
xs, -norm_density.pdf(xs), label='Fitted Gaussian', | |
alpha=.5, color='tab:orange' | |
) | |
mean_ax.hist( | |
smean, bins='auto', label=r'Sample Means $\bar{x}$', | |
density=True, color='tab:orange', ec='white' | |
) | |
mean_ax.yaxis.set_visible(False) | |
mean_ax.spines['bottom'].set_position('zero') | |
### Cleaning Aesthetics | |
# Set custom ticks and limits to the Simulation & Sampling Dist. Axes | |
# The y-axes are shared across rows of the figure, | |
# so we only need to invert 1 y-axis out of the row of sample Axes | |
samp_ax = axes[1, 0] | |
samp_ax.yaxis.set_major_locator(MultipleLocator(10)) | |
samp_ax.yaxis.set_major_formatter('Simulation {x:.0f}') | |
samp_ax.invert_yaxis() | |
# samp_ax.set_ylabel('Simulation', size='large', rotation=-90, va='top') | |
# Manually set the xlimits, they're shared across all Axes | |
# the population means hover ~70, so we drop that xtick for visibility | |
mean_ax = axes[2, 0] | |
mean_ax.set(xlim=(60, 80), xticks=[60, 65, 75, 80]) | |
mean_ax.invert_yaxis() # flip the histgram and KDE, so the KDE is on top | |
mean_ax.margins(y=0.1) | |
### Adding Text Annotations & Legends | |
## Create left-aligned titles for each row | |
axes_titles = [ | |
('Population', ''), | |
('Samples', f'n = {sample_size}'), | |
('Sampling Distribution', 'of the sample mean') | |
] | |
for (title, subtitle), ax in zip(axes_titles, axes[:, 0], strict=True): | |
titlebox = [ | |
TextArea(title, textprops={'size': 'large', 'weight': 'semibold'}) | |
] | |
if subtitle: | |
titlebox.append(TextArea(subtitle, textprops={'style': 'italic'})) | |
title_packer = VPacker(pad=0, sep=5, children=titlebox) | |
legend = fig.legend( | |
*ax.get_legend_handles_labels(), markerscale=4, scatterpoints=4 | |
) | |
legend.remove() | |
# Legends are composed of two children: VPacker & FancyBboxPatch | |
# We can extract the VPacker and add it to our own for a very custom title | |
legend_body, _ = legend.get_children() | |
transform = blended_transform_factory(fig.transFigure, ax.transAxes) | |
fig.add_artist( | |
AnchoredOffsetbox( | |
loc='upper left', | |
child=VPacker( | |
align='left', pad=10, sep=10, | |
children=[title_packer, legend_body] | |
), | |
bbox_to_anchor=(0.05, 1), bbox_transform=transform, | |
borderpad=0, frameon=False | |
) | |
) | |
## Add lines to separate rows of plots | |
for ax in axes[1:, 0]: | |
transform = blended_transform_factory(fig.transFigure, ax.transAxes) | |
fig.add_artist( | |
Line2D([.05, .95], [1, 1], color='lightgray', transform=transform) | |
) | |
for ax in axes[:, 1:].flat: | |
ax.yaxis.set_tick_params(left=False) | |
# Remove the spines | |
for ax in fig.axes: | |
sides = ['bottom', 'left', 'top', 'right'] | |
if ax in axes[-1, :]: | |
sides.remove('bottom') | |
ax.spines[sides].set_visible(False) | |
## Figure title - VPacker with TextAreas enables great control | |
# over alignment & different fonts | |
figure_title = VPacker( | |
align='center', pad=0, sep=5, | |
children=[ | |
TextArea( | |
'Visualizing the Central Limit Theorem', | |
textprops={'size': 'x-large', 'weight': 'bold'} | |
), | |
TextArea( | |
'Sampling Distributions of the Sample Mean', | |
textprops={'size': 'large', 'style': 'italic'} | |
) | |
]) | |
fig.add_artist( | |
AnchoredOffsetbox( | |
loc='upper center', child=figure_title, | |
bbox_to_anchor=(0.5, 1.0), bbox_transform=fig.transFigure, | |
frameon=False | |
) | |
) | |
### Author Watermark | |
fig.text(.98, .02, s='Cameron Riddell @RiddleMeCam', ha='right', color='lightgray') | |
show() | |
# fig.savefig('clt.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment