Created
August 16, 2017 08:54
-
-
Save saliksyed/c5070cc48a85c5bc80cef813907dd2a4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Aug 16 11:14:06 2017 | |
@author: saliksyed | |
""" | |
import random | |
import numpy as np | |
from sklearn.datasets import make_blobs | |
import matplotlib.pyplot as plt | |
n_samples = 1500 | |
random_state = 170 | |
X, y = make_blobs(n_samples=n_samples, random_state=random_state) | |
points = X | |
def compute_centroid(points): | |
return_pt = np.array(points[0]) * 1.0/len(points) | |
for point in points[1:]: | |
return_pt += np.array(point) * 1.0/len(points) | |
return return_pt | |
def dist(pt1, pt2): | |
return np.linalg.norm(np.array(pt1) - np.array(pt2)) | |
def kmeans(points, k=3): | |
curr_centers = random.sample(points, k) | |
assignments = {} | |
while True: | |
for point in points: | |
distances = [] | |
for cluster in curr_centers: | |
distances.append(dist(cluster, point)) | |
min_dist = min(distances) | |
cluster_to_assign = distances.index(min_dist) | |
if not cluster_to_assign in assignments: | |
assignments[cluster_to_assign]= [] | |
assignments[cluster_to_assign].append(point) | |
updated_centers = [0]*k | |
for cluster in assignments.keys(): | |
updated_centers[cluster] = compute_centroid(assignments[cluster]) | |
epsilon = 0.0 | |
for cluster in xrange(0,k): | |
epsilon += dist(updated_centers[cluster], curr_centers[cluster]) | |
if epsilon < 0.1: | |
break | |
else: | |
curr_centers = updated_centers | |
print epsilon | |
return assignments | |
pts = kmeans(points, 3) | |
for cluster in pts: | |
curr = pts[cluster] | |
x = [] | |
y = [] | |
for point in curr: | |
x.append(point[0]) | |
y.append(point[1]) | |
plt.scatter(x, y) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment