Skip to content

Instantly share code, notes, and snippets.

@saliksyed
Created August 16, 2017 08:54
Show Gist options
  • Save saliksyed/c5070cc48a85c5bc80cef813907dd2a4 to your computer and use it in GitHub Desktop.
Save saliksyed/c5070cc48a85c5bc80cef813907dd2a4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 16 11:14:06 2017
@author: saliksyed
"""
import random
import numpy as np
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
n_samples = 1500
random_state = 170
X, y = make_blobs(n_samples=n_samples, random_state=random_state)
points = X
def compute_centroid(points):
return_pt = np.array(points[0]) * 1.0/len(points)
for point in points[1:]:
return_pt += np.array(point) * 1.0/len(points)
return return_pt
def dist(pt1, pt2):
return np.linalg.norm(np.array(pt1) - np.array(pt2))
def kmeans(points, k=3):
curr_centers = random.sample(points, k)
assignments = {}
while True:
for point in points:
distances = []
for cluster in curr_centers:
distances.append(dist(cluster, point))
min_dist = min(distances)
cluster_to_assign = distances.index(min_dist)
if not cluster_to_assign in assignments:
assignments[cluster_to_assign]= []
assignments[cluster_to_assign].append(point)
updated_centers = [0]*k
for cluster in assignments.keys():
updated_centers[cluster] = compute_centroid(assignments[cluster])
epsilon = 0.0
for cluster in xrange(0,k):
epsilon += dist(updated_centers[cluster], curr_centers[cluster])
if epsilon < 0.1:
break
else:
curr_centers = updated_centers
print epsilon
return assignments
pts = kmeans(points, 3)
for cluster in pts:
curr = pts[cluster]
x = []
y = []
for point in curr:
x.append(point[0])
y.append(point[1])
plt.scatter(x, y)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment