MonteShaffer · October 22, 2017 17:52
diff --git a/Kmeans.R b/Kmeans.R
 # I wrote this in 2009 as part of my M.S. Stats - Statistical Computing Course
 # Not all the bells and whistles, but should be understandable
 # &copy; Monte J. Shaffer
 # [email protected]
 # MIT license

 myKmeans = function(data,clusters,maxIter=25)
 	{
 	data = as.matrix(data);
 	n = nrow(data);
 	p = ncol(data);		# p-dim space

 	## step 0:  initialize k-points as centers
 	if(length(clusters)==1)
 		{
 		k = clusters;
 		init = data[sample(1:n, k),];
 		}
 		else
 			{
 			# cluster centers are passed
 			k = nrow(cluster);
 			init = clusters;
 			}

 		
 	myDist = function(x,y)
 		{
 		# simple Euclidean
 		tDist = 0;

 		for(d in 1:length(x))
 			{
 			tDist = tDist + (x[d]-y[d])^2;
 			}
 		tDist;	#sqrt not necessary as WSS is not calculated, just to find MIN
 		}
 	kOneStep = function(data,init,n,k,myReturn="centroid")
 		{
 		# init is centroids

 	## step 1:  classify n points to k-centers (init)	
 		## grab distances [could use mahalanobis(, but just dist( 
 		## which defaults to Euclidean
 		newData = matrix(0, nrow=n,ncol=k);

 		# loop and calculate every pairwise distance, could put into vector, 
 		# but this will work [although processor expensive]
 		for(i in 1:n)
 			{
 			for(j in 1:k)
 				{
 				newData[i,j] = myDist(data[i,],init[j,]);
 				}
 			}
 			

 		## classify to classes
 		newClass = matrix(0, nrow=1,ncol=k);
 		## loop through distances and classify based on min
 		for(i in 1:n)
 			{
 			myMin = min(newData[i,]);
 			for(j in 1:k)
 				{
 				if(newData[i,j]==myMin)
 					{
 					myIndex = j;
 					break;
 					}
 				}
 			newClass[i]=myIndex;
 			}
 		if(myReturn != "centroid")
 			{
 			newClass;
 			}
 			else
 				{
 				## step 2:  compute new center
 					myData = cbind(newClass,data);

 					#print(myData);
 					centroid = matrix(0, nrow=k,ncol=p);
 					for(j in 1:k)
 						{
 						tempData = subset(myData,newClass==j)[,-1];
 						#print(tempData);
 						for(m in 1:p)
 							{
 							centroid[j,m] = mean(tempData[,m]);
 							}
 						}		
 					centroid;
 				}
 		}

 	
 	
 		for(iter in 1:maxIter)
 			{
 			init = kOneStep(data,init,n,k);	
 			}
 			# one more time to get classifications
 			newClass = kOneStep(data,init,n,k,"class");	
 		
 	out = list(cluster = newClass, centers = init);
 	class(out) = "myKmeans";
 	out;
 	}
	# I wrote this in 2009 as part of my M.S. Stats - Statistical Computing Course
	# Not all the bells and whistles, but should be understandable
	# © Monte J. Shaffer
	# [email protected]
	# MIT license

	myKmeans = function(data,clusters,maxIter=25)
	{
	data = as.matrix(data);
	n = nrow(data);
	p = ncol(data); # p-dim space

	## step 0: initialize k-points as centers
	if(length(clusters)==1)
	{
	k = clusters;
	init = data[sample(1:n, k),];
	}
	else
	{
	# cluster centers are passed
	k = nrow(cluster);
	init = clusters;
	}


	myDist = function(x,y)
	{
	# simple Euclidean
	tDist = 0;

	for(d in 1:length(x))
	{
	tDist = tDist + (x[d]-y[d])^2;
	}
	tDist; #sqrt not necessary as WSS is not calculated, just to find MIN
	}
	kOneStep = function(data,init,n,k,myReturn="centroid")
	{
	# init is centroids

	## step 1: classify n points to k-centers (init)
	## grab distances [could use mahalanobis(, but just dist(
	## which defaults to Euclidean
	newData = matrix(0, nrow=n,ncol=k);

	# loop and calculate every pairwise distance, could put into vector,
	# but this will work [although processor expensive]
	for(i in 1:n)
	{
	for(j in 1:k)
	{
	newData[i,j] = myDist(data[i,],init[j,]);
	}
	}


	## classify to classes
	newClass = matrix(0, nrow=1,ncol=k);
	## loop through distances and classify based on min
	for(i in 1:n)
	{
	myMin = min(newData[i,]);
	for(j in 1:k)
	{
	if(newData[i,j]==myMin)
	{
	myIndex = j;
	break;
	}
	}
	newClass[i]=myIndex;
	}
	if(myReturn != "centroid")
	{
	newClass;
	}
	else
	{
	## step 2: compute new center
	myData = cbind(newClass,data);

	#print(myData);
	centroid = matrix(0, nrow=k,ncol=p);
	for(j in 1:k)
	{
	tempData = subset(myData,newClass==j)[,-1];
	#print(tempData);
	for(m in 1:p)
	{
	centroid[j,m] = mean(tempData[,m]);
	}
	}
	centroid;
	}
	}



	for(iter in 1:maxIter)
	{
	init = kOneStep(data,init,n,k);
	}
	# one more time to get classifications
	newClass = kOneStep(data,init,n,k,"class");

	out = list(cluster = newClass, centers = init);
	class(out) = "myKmeans";
	out;
	}