Skip to content

Instantly share code, notes, and snippets.

@mitmul
Created December 11, 2013 03:25
Show Gist options
  • Select an option

  • Save mitmul/7904680 to your computer and use it in GitHub Desktop.

Select an option

Save mitmul/7904680 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import copy
import numpy as np
import matplotlib.pyplot as plt
data1 = np.random.randn(100, 2)
data2 = np.random.randn(100, 2) + np.asarray([6, 2])
data = np.vstack((data1, data2))
class KMeans:
def __init__(self, data, k):
self.k = k
self.data = data
self.num = data.shape[0]
self.dim = data.shape[1]
self.mu = np.random.rand(k, self.dim)
self.r = {}
def step_E(self):
self.r = {}
# for each data point
for i in range(self.num):
x_i = self.data[i,:]
min_k = 0
min_d = np.linalg.norm(x_i - self.mu[0,:])
for k in range(self.k):
# mean vector of cluster k
mu_k = self.mu[k,:]
d = np.linalg.norm(x_i - mu_k)
if d < min_d:
min_k = k
min_d = d
if not self.r.has_key(min_k):
self.r[min_k] = []
self.r[min_k].append(x_i)
def step_M(self):
for k in range(self.k):
mu_k = np.zeros((1, self.dim))
for x_i in self.r[k]:
mu_k += x_i
mu_k /= len(self.r[k])
self.mu[k,:] = mu_k
kmeans = KMeans(data, 2)
mu1 = []
mu2 = []
mu1.append(copy.deepcopy(kmeans.mu[0,:]))
mu2.append(copy.deepcopy(kmeans.mu[1,:]))
for i in range(50):
kmeans.step_E()
kmeans.step_M()
mu1.append(copy.deepcopy(kmeans.mu[0,:]))
mu2.append(copy.deepcopy(kmeans.mu[1,:]))
mu1 = np.asarray(mu1)
mu2 = np.asarray(mu2)
plt.scatter(data1[:, 0], data1[:, 1], c='r', marker='+')
plt.scatter(data2[:, 0], data2[:, 1], c='g', marker='+')
plt.plot(mu1[:, 0], mu1[:, 1], '-^r')
plt.plot(mu2[:, 0], mu2[:, 1], '-^g')
plt.plot(mu1[-1, 0], mu1[-1, 1], 'or')
plt.plot(mu2[-1, 0], mu2[-1, 1], 'og')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment