Last active
October 22, 2017 17:52
-
-
Save MonteShaffer/b4b86cf39789281b7e5f52fc0ed85d94 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# I wrote this in 2009 as part of my M.S. Stats - Statistical Computing Course | |
# Not all the bells and whistles, but should be understandable | |
# © Monte J. Shaffer | |
# [email protected] | |
# MIT license | |
myKmeans = function(data,clusters,maxIter=25) | |
{ | |
data = as.matrix(data); | |
n = nrow(data); | |
p = ncol(data); # p-dim space | |
## step 0: initialize k-points as centers | |
if(length(clusters)==1) | |
{ | |
k = clusters; | |
init = data[sample(1:n, k),]; | |
} | |
else | |
{ | |
# cluster centers are passed | |
k = nrow(cluster); | |
init = clusters; | |
} | |
myDist = function(x,y) | |
{ | |
# simple Euclidean | |
tDist = 0; | |
for(d in 1:length(x)) | |
{ | |
tDist = tDist + (x[d]-y[d])^2; | |
} | |
tDist; #sqrt not necessary as WSS is not calculated, just to find MIN | |
} | |
kOneStep = function(data,init,n,k,myReturn="centroid") | |
{ | |
# init is centroids | |
## step 1: classify n points to k-centers (init) | |
## grab distances [could use mahalanobis(, but just dist( | |
## which defaults to Euclidean | |
newData = matrix(0, nrow=n,ncol=k); | |
# loop and calculate every pairwise distance, could put into vector, | |
# but this will work [although processor expensive] | |
for(i in 1:n) | |
{ | |
for(j in 1:k) | |
{ | |
newData[i,j] = myDist(data[i,],init[j,]); | |
} | |
} | |
## classify to classes | |
newClass = matrix(0, nrow=1,ncol=k); | |
## loop through distances and classify based on min | |
for(i in 1:n) | |
{ | |
myMin = min(newData[i,]); | |
for(j in 1:k) | |
{ | |
if(newData[i,j]==myMin) | |
{ | |
myIndex = j; | |
break; | |
} | |
} | |
newClass[i]=myIndex; | |
} | |
if(myReturn != "centroid") | |
{ | |
newClass; | |
} | |
else | |
{ | |
## step 2: compute new center | |
myData = cbind(newClass,data); | |
#print(myData); | |
centroid = matrix(0, nrow=k,ncol=p); | |
for(j in 1:k) | |
{ | |
tempData = subset(myData,newClass==j)[,-1]; | |
#print(tempData); | |
for(m in 1:p) | |
{ | |
centroid[j,m] = mean(tempData[,m]); | |
} | |
} | |
centroid; | |
} | |
} | |
for(iter in 1:maxIter) | |
{ | |
init = kOneStep(data,init,n,k); | |
} | |
# one more time to get classifications | |
newClass = kOneStep(data,init,n,k,"class"); | |
out = list(cluster = newClass, centers = init); | |
class(out) = "myKmeans"; | |
out; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment