Created
April 23, 2015 16:06
-
-
Save ubnt-intrepid/15d620613105a5bf216d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import numpy as np | |
from scipy import linalg | |
import matplotlib.pyplot as plt | |
import matplotlib as mpl | |
from sklearn import mixture | |
# Number of samples per component | |
n_samples = 500 | |
# Generate random sample, two components | |
np.random.seed(0) | |
C = np.array([[0., -0.1], [3.4, .4]]) | |
X = np.r_[np.dot(np.random.randn(2*n_samples, 2), C), | |
np.dot(np.random.randn(2*n_samples, 2), C) + np.array([0,8]), | |
np.dot(np.random.randn(2*n_samples, 2), C.T) + np.array([5,6]), | |
.7 * np.random.randn(n_samples, 2) + np.array([-3, 3])] | |
# Fit a mixture of Gaussians with EM using five components | |
aic_min = 1e12 | |
c_best = -1 | |
for c in range(1, 11): | |
gmm_c = mixture.GMM(n_components=c, covariance_type='full') | |
gmm_c.fit(X) | |
aic = gmm_c.bic(X) | |
if aic < aic_min: | |
aic_min = aic | |
gmm = gmm_c | |
c_best = c | |
print('GMM: num_cluster =', c_best) | |
# Fit a Dirichlet process mixture of Gaussians using five components | |
vbgmm = mixture.VBGMM(n_components=10, covariance_type='full') | |
vbgmm.fit(X) | |
# Fit a Dirichlet process mixture of Gaussians using five components | |
dpgmm = mixture.DPGMM(n_components=5, covariance_type='full') | |
dpgmm.fit(X) | |
color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm']) | |
plt.figure(figsize=(4, 12)) | |
for i, (clf, title) in enumerate([(gmm, 'GMM'), | |
(vbgmm, 'Variational Bayesian GMM'), | |
(dpgmm, 'Dirichlet Process GMM')]): | |
splot = plt.subplot(3, 1, 1 + i) | |
Y_ = clf.predict(X) | |
for i, (mean, covar, color) in enumerate(zip( | |
clf.means_, clf._get_covars(), color_iter)): | |
v, w = linalg.eigh(covar) | |
u = w[0] / linalg.norm(w[0]) | |
# as the DP will not use every component it has access to | |
# unless it needs it, we shouldn't plot the redundant | |
# components. | |
if not np.any(Y_ == i): | |
continue | |
plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color) | |
# Plot an ellipse to show the Gaussian component | |
angle = np.arctan(u[1] / u[0]) | |
angle = 180 * angle / np.pi # convert to degrees | |
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color) | |
ell.set_clip_box(splot.bbox) | |
ell.set_alpha(0.5) | |
splot.add_artist(ell) | |
plt.xlim(-10, 10) | |
plt.ylim(-5, 15) | |
plt.xticks(()) | |
plt.yticks(()) | |
plt.title(title) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment