ubnt-intrepid · April 23, 2015 16:06
diff --git a/gistfile1.py b/gistfile1.py
 import itertools

 import numpy as np
 from scipy import linalg
 import matplotlib.pyplot as plt
 import matplotlib as mpl

 from sklearn import mixture

 # Number of samples per component
 n_samples = 500

 # Generate random sample, two components
 np.random.seed(0)
 C = np.array([[0., -0.1], [3.4, .4]])
 X = np.r_[np.dot(np.random.randn(2*n_samples, 2), C),
          np.dot(np.random.randn(2*n_samples, 2), C) + np.array([0,8]),
          np.dot(np.random.randn(2*n_samples, 2), C.T) + np.array([5,6]),
          .7 * np.random.randn(n_samples, 2) + np.array([-3, 3])]

 # Fit a mixture of Gaussians with EM using five components
 aic_min = 1e12
 c_best = -1
 for c in range(1, 11):
    gmm_c = mixture.GMM(n_components=c, covariance_type='full')
    gmm_c.fit(X)
    aic = gmm_c.bic(X)
    if aic < aic_min:
        aic_min = aic 
        gmm = gmm_c
        c_best = c
 print('GMM: num_cluster =', c_best)

 # Fit a Dirichlet process mixture of Gaussians using five components
 vbgmm = mixture.VBGMM(n_components=10, covariance_type='full')
 vbgmm.fit(X)

 # Fit a Dirichlet process mixture of Gaussians using five components
 dpgmm = mixture.DPGMM(n_components=5, covariance_type='full')
 dpgmm.fit(X)


 color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])

 plt.figure(figsize=(4, 12))
 for i, (clf, title) in enumerate([(gmm, 'GMM'),
                                  (vbgmm, 'Variational Bayesian GMM'),
                                  (dpgmm, 'Dirichlet Process GMM')]):
    splot = plt.subplot(3, 1, 1 + i)
    Y_ = clf.predict(X)
    for i, (mean, covar, color) in enumerate(zip(
            clf.means_, clf._get_covars(), color_iter)):
        v, w = linalg.eigh(covar)
        u = w[0] / linalg.norm(w[0])
        # as the DP will not use every component it has access to
        # unless it needs it, we shouldn't plot the redundant
        # components.
        if not np.any(Y_ == i):
            continue
        plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)

        # Plot an ellipse to show the Gaussian component
        angle = np.arctan(u[1] / u[0])
        angle = 180 * angle / np.pi  # convert to degrees
        ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)
        ell.set_clip_box(splot.bbox)
        ell.set_alpha(0.5)
        splot.add_artist(ell)

    plt.xlim(-10, 10)
    plt.ylim(-5, 15)
    plt.xticks(())
    plt.yticks(())
    plt.title(title)

 plt.show()
	import itertools

	import numpy as np
	from scipy import linalg
	import matplotlib.pyplot as plt
	import matplotlib as mpl

	from sklearn import mixture

	# Number of samples per component
	n_samples = 500

	# Generate random sample, two components
	np.random.seed(0)
	C = np.array([[0., -0.1], [3.4, .4]])
	X = np.r_[np.dot(np.random.randn(2*n_samples, 2), C),
	np.dot(np.random.randn(2*n_samples, 2), C) + np.array([0,8]),
	np.dot(np.random.randn(2*n_samples, 2), C.T) + np.array([5,6]),
	.7 * np.random.randn(n_samples, 2) + np.array([-3, 3])]

	# Fit a mixture of Gaussians with EM using five components
	aic_min = 1e12
	c_best = -1
	for c in range(1, 11):
	gmm_c = mixture.GMM(n_components=c, covariance_type='full')
	gmm_c.fit(X)
	aic = gmm_c.bic(X)
	if aic < aic_min:
	aic_min = aic
	gmm = gmm_c
	c_best = c
	print('GMM: num_cluster =', c_best)

	# Fit a Dirichlet process mixture of Gaussians using five components
	vbgmm = mixture.VBGMM(n_components=10, covariance_type='full')
	vbgmm.fit(X)

	# Fit a Dirichlet process mixture of Gaussians using five components
	dpgmm = mixture.DPGMM(n_components=5, covariance_type='full')
	dpgmm.fit(X)


	color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])

	plt.figure(figsize=(4, 12))
	for i, (clf, title) in enumerate([(gmm, 'GMM'),
	(vbgmm, 'Variational Bayesian GMM'),
	(dpgmm, 'Dirichlet Process GMM')]):
	splot = plt.subplot(3, 1, 1 + i)
	Y_ = clf.predict(X)
	for i, (mean, covar, color) in enumerate(zip(
	clf.means_, clf._get_covars(), color_iter)):
	v, w = linalg.eigh(covar)
	u = w[0] / linalg.norm(w[0])
	# as the DP will not use every component it has access to
	# unless it needs it, we shouldn't plot the redundant
	# components.
	if not np.any(Y_ == i):
	continue
	plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)

	# Plot an ellipse to show the Gaussian component
	angle = np.arctan(u[1] / u[0])
	angle = 180 * angle / np.pi # convert to degrees
	ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)
	ell.set_clip_box(splot.bbox)
	ell.set_alpha(0.5)
	splot.add_artist(ell)

	plt.xlim(-10, 10)
	plt.ylim(-5, 15)
	plt.xticks(())
	plt.yticks(())
	plt.title(title)

	plt.show()