Last active
June 2, 2017 09:02
-
-
Save tomrockdsouza/a29010081b42fb581b0151a6c2b81ff5 to your computer and use it in GitHub Desktop.
Java Implementation of Kmeans Algorithm on 2D Dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** The class encapsulates an implementation of the Kmeans algorithm for 2D Dataset | |
* | |
* First argument is for Number of Elements | |
* Second argument is for Start of Randomizer | |
* Third argument is for End of Randomizer | |
* Fourth argument is for Number of Clusters | |
* | |
* Usage with the command line : | |
* >java Kmean2D 7 10 20 3 | |
* This Means 7 tupples of data will be created | |
* With each tupple having randomized values between 10 to 40 | |
* And 3 clusters will be formed. | |
* | |
* As per Kmeans Algorithm when the datasets from previous iteration belong to the same clusters in current iteration | |
* The Algorithm stops and clusters are finalized | |
* | |
* @author Tomrock D'souza, St. Francis Institute Of Technology, University of Mumbai, 2017 | |
* @copyright GNU General Public License v3 | |
* No reproduction in whole or part without maintaining this copyright notice | |
* and imposing this condition on any subsequent users. | |
*/ | |
import java.util.* ; | |
import java.lang.Math; | |
class Kmean2D { | |
//Euclidean's Distance Formula | |
public static double Dist(Double a, Double b, Double c, Double d) { | |
return Math.sqrt(Math.pow((a - c), 2) + Math.pow((b - d), 2)); | |
} | |
public static void main(String args[]) { | |
int number,numSize,numStart,numEnd,x,y = 0,cSize,l,k,z,i,j; | |
Double temp,tempa; | |
//Processing Arguments | |
numSize = Integer.parseInt(args[0]); | |
numStart = Integer.parseInt(args[1]); | |
numEnd = Integer.parseInt(args[2]); | |
cSize = Integer.parseInt(args[3]); | |
x = numEnd - numStart + 1; | |
Integer[] rand = new Integer[x]; | |
for (z = numStart; z <= numEnd; z++) { | |
rand[y] = z; | |
y++; | |
} | |
//Filling Array 1 With Random Variables | |
Collections.shuffle(Arrays.asList(rand)); | |
Integer[] arrayX = Arrays.copyOfRange(rand, 0, numSize + 1); | |
//Filling Array 2 With Random Variables | |
Collections.shuffle(Arrays.asList(rand)); | |
Integer[] arrayY = Arrays.copyOfRange(rand, 0, numSize + 1); | |
Integer[] clustvalue = new Integer[numSize]; | |
Integer[] clusttemp = new Integer[numSize]; | |
Double[] clustX = new Double[cSize]; | |
Double[] clustY = new Double[cSize]; | |
//Presigning Clusters And Getting Numerical Ready For First Iteration | |
y = numSize / cSize; | |
z = 0; | |
x = 0; | |
for (i = 0; i < cSize - 1; i++) { | |
l = 0; | |
for (j = 0; j < y; j++) { | |
clustvalue[z] = i; | |
z++; | |
} | |
} | |
for (; z < numSize; z++) { | |
clustvalue[z] = cSize - 1; | |
} | |
System.out.println("arrayX\tarrayY\tAssigned-cluster"); | |
for (i = 0; i < numSize; i++) { | |
System.out.println(arrayX[i] + "\t" + arrayY[i] + "\t" + clustvalue[i]); | |
} | |
//At this stage The first assignment of clusters to the data sets is done/ | |
while (true) { | |
//Get Cluster Value | |
for (i = 0; i < cSize; i++) { | |
l = 0; | |
k = 0; | |
for (j = 0; j < numSize; j++) { | |
if (clustvalue[j] == i) { | |
l += arrayX[j]; | |
k += arrayY[j]; | |
x++; | |
} | |
} | |
clustX[i] = (double) l / x; | |
clustY[i] = (double) k / x; | |
x = 0; | |
} | |
//Find Distance of dataset with each Cluster and select the lowest | |
for (i = 0; i < numSize; i++) { | |
temp = 9999.9; | |
for (j = 0; j < cSize; j++) { | |
tempa = Dist(clustX[j], clustY[j], (double) arrayX[i], (double) arrayY[i]); | |
if (temp > tempa) { | |
clusttemp[i] = j; | |
temp = tempa; | |
} | |
} | |
} | |
//Printing out data for Iteration | |
for (i = 0; i < cSize; i++) { | |
System.out.println("Current Cluster Value " + i + " =(" + clustX[i] + "," + clustY[i] + ")"); | |
} | |
System.out.println("\n\narrayX\t\tarrayY\t\tPre-clus\tCurr-Clus"); | |
for (i = 0; i < numSize; i++) { | |
System.out.println(arrayX[i] + "\t\t" + arrayY[i] + "\t\t" + clustvalue[i] + "\t\t" + clusttemp[i]); | |
} | |
//Condition that matches current and previous data set | |
if (!Arrays.equals(clusttemp, clustvalue)) { | |
clustvalue = clusttemp.clone(); | |
} | |
else { | |
for (i = 0; i < cSize; i++) { | |
l = 0; | |
k = 0; | |
for (j = 0; j < numSize; j++) { | |
if (clustvalue[j] == i) { | |
l += arrayX[j]; | |
k += arrayY[j]; | |
x++; | |
} | |
} | |
clustX[i] = (double) l / x; | |
clustY[i] = (double) k / x; | |
x = 0; | |
} | |
System.out.println("\nFinal Cluster Values:"); | |
for (i = 0; i < cSize; i++) { | |
System.out.println("Cluster Value " + i + " =(" + clustX[i] + "," + clustY[i] + ")"); | |
} | |
break;} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment