Last active
March 9, 2022 17:43
-
-
Save cwindolf/20ee543bd5208bb40c73af9083a3eb9c to your computer and use it in GitHub Desktop.
Categorical scatterplot with Gaussian contours
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# adapted from https://matplotlib.org/3.1.1/gallery/statistics/confidence_ellipse.html | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import colorcet | |
from matplotlib.patches import Ellipse | |
import matplotlib.transforms as transforms | |
def cluster_scatter(xs, ys, ids, ax=None, n_std=2.0): | |
ax = ax or plt.gca() | |
# scatter and collect gaussian info | |
means = {} | |
covs = {} | |
for k in np.unique(ids): | |
where = np.flatnonzero(ids == k) | |
xk = xs[where] | |
yk = ys[where] | |
color = colorcet.glasbey_hv[k % 256] | |
ax.scatter(xk, yk, s=1, color=color, alpha=0.5) | |
means[k] = xk.mean(), yk.mean() | |
covs[k] = np.cov(xk, yk) | |
for k in means.keys(): | |
mean_x, mean_y = means[k] | |
cov = covs[k] | |
vx, vy = cov[0, 0], cov[1, 1] | |
rho = cov[0, 1] / np.sqrt(vx * vy) | |
color = colorcet.glasbey_hv[k % 256] | |
ell = Ellipse( | |
(0, 0), | |
width=2 * np.sqrt(1 + rho), | |
height=2 * np.sqrt(1 - rho), | |
facecolor=(0, 0, 0, 0), | |
edgecolor=color, | |
linewidth=1, | |
) | |
transform = ( | |
transforms.Affine2D() | |
.rotate_deg(45) | |
.scale(n_std * np.sqrt(vx), n_std * np.sqrt(vy)) | |
.translate(mean_x, mean_y) | |
) | |
ell.set_transform(transform + ax.transData) | |
ax.add_patch(ell) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment