Skip to content

Instantly share code, notes, and snippets.

@llSourcell
Created July 3, 2017 11:46
Show Gist options
  • Select an option

  • Save llSourcell/f672957265ec4a2d61d89549d22122aa to your computer and use it in GitHub Desktop.

Select an option

Save llSourcell/f672957265ec4a2d61d89549d22122aa to your computer and use it in GitHub Desktop.
while norm > epsilon:
iteration += 1
norm = dist_method(prototypes, prototypes_old)
prototypes_old = prototypes
#for each instance in the dataset
for index_instance, instance in enumerate(dataset):
#define a distance vector of size k
dist_vec = np.zeros((k, 1))
#for each centroid
for index_prototype, prototype in enumerate(prototypes):
#compute the distance between each centroid and data point and store them
dist_vec[index_prototype] = dist_method(prototype,
instance)
#find the smallest distance, assign that distance to a cluster
belongs_to[index_instance, 0] = np.argmin(dist_vec)
tmp_prototypes = np.zeros((k, num_features))
#for each cluster (k of them)
for index in range(len(prototypes)):
#get all the points assigned to a cluster
instances_close = [i for i in range(len(belongs_to)) if belongs_to[i] == index]
#find the mean of those points, this is our new centroid
prototype = np.mean(dataset[instances_close], axis=0)
#add our new centroid to our new temporary list
tmp_prototypes[index, :] = prototype
#set the new list to the current list
prototypes = tmp_prototypes
#add our calculated centroids to our history for plotting
history_centroids.append(tmp_prototypes)
#return calculated centroids, history of them all, and assignments for which cluster each datapoint belongs to
return prototypes, history_centroids, belongs_to
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment