Skip to content

Instantly share code, notes, and snippets.

@tomrockdsouza
Last active June 2, 2017 09:02
Show Gist options
  • Save tomrockdsouza/a29010081b42fb581b0151a6c2b81ff5 to your computer and use it in GitHub Desktop.
Save tomrockdsouza/a29010081b42fb581b0151a6c2b81ff5 to your computer and use it in GitHub Desktop.
Java Implementation of Kmeans Algorithm on 2D Dataset
/** The class encapsulates an implementation of the Kmeans algorithm for 2D Dataset
*
* First argument is for Number of Elements
* Second argument is for Start of Randomizer
* Third argument is for End of Randomizer
* Fourth argument is for Number of Clusters
*
* Usage with the command line :
* >java Kmean2D 7 10 20 3
* This Means 7 tupples of data will be created
* With each tupple having randomized values between 10 to 40
* And 3 clusters will be formed.
*
* As per Kmeans Algorithm when the datasets from previous iteration belong to the same clusters in current iteration
* The Algorithm stops and clusters are finalized
*
* @author Tomrock D'souza, St. Francis Institute Of Technology, University of Mumbai, 2017
* @copyright GNU General Public License v3
* No reproduction in whole or part without maintaining this copyright notice
* and imposing this condition on any subsequent users.
*/
import java.util.* ;
import java.lang.Math;
class Kmean2D {
//Euclidean's Distance Formula
public static double Dist(Double a, Double b, Double c, Double d) {
return Math.sqrt(Math.pow((a - c), 2) + Math.pow((b - d), 2));
}
public static void main(String args[]) {
int number,numSize,numStart,numEnd,x,y = 0,cSize,l,k,z,i,j;
Double temp,tempa;
//Processing Arguments
numSize = Integer.parseInt(args[0]);
numStart = Integer.parseInt(args[1]);
numEnd = Integer.parseInt(args[2]);
cSize = Integer.parseInt(args[3]);
x = numEnd - numStart + 1;
Integer[] rand = new Integer[x];
for (z = numStart; z <= numEnd; z++) {
rand[y] = z;
y++;
}
//Filling Array 1 With Random Variables
Collections.shuffle(Arrays.asList(rand));
Integer[] arrayX = Arrays.copyOfRange(rand, 0, numSize + 1);
//Filling Array 2 With Random Variables
Collections.shuffle(Arrays.asList(rand));
Integer[] arrayY = Arrays.copyOfRange(rand, 0, numSize + 1);
Integer[] clustvalue = new Integer[numSize];
Integer[] clusttemp = new Integer[numSize];
Double[] clustX = new Double[cSize];
Double[] clustY = new Double[cSize];
//Presigning Clusters And Getting Numerical Ready For First Iteration
y = numSize / cSize;
z = 0;
x = 0;
for (i = 0; i < cSize - 1; i++) {
l = 0;
for (j = 0; j < y; j++) {
clustvalue[z] = i;
z++;
}
}
for (; z < numSize; z++) {
clustvalue[z] = cSize - 1;
}
System.out.println("arrayX\tarrayY\tAssigned-cluster");
for (i = 0; i < numSize; i++) {
System.out.println(arrayX[i] + "\t" + arrayY[i] + "\t" + clustvalue[i]);
}
//At this stage The first assignment of clusters to the data sets is done/
while (true) {
//Get Cluster Value
for (i = 0; i < cSize; i++) {
l = 0;
k = 0;
for (j = 0; j < numSize; j++) {
if (clustvalue[j] == i) {
l += arrayX[j];
k += arrayY[j];
x++;
}
}
clustX[i] = (double) l / x;
clustY[i] = (double) k / x;
x = 0;
}
//Find Distance of dataset with each Cluster and select the lowest
for (i = 0; i < numSize; i++) {
temp = 9999.9;
for (j = 0; j < cSize; j++) {
tempa = Dist(clustX[j], clustY[j], (double) arrayX[i], (double) arrayY[i]);
if (temp > tempa) {
clusttemp[i] = j;
temp = tempa;
}
}
}
//Printing out data for Iteration
for (i = 0; i < cSize; i++) {
System.out.println("Current Cluster Value " + i + " =(" + clustX[i] + "," + clustY[i] + ")");
}
System.out.println("\n\narrayX\t\tarrayY\t\tPre-clus\tCurr-Clus");
for (i = 0; i < numSize; i++) {
System.out.println(arrayX[i] + "\t\t" + arrayY[i] + "\t\t" + clustvalue[i] + "\t\t" + clusttemp[i]);
}
//Condition that matches current and previous data set
if (!Arrays.equals(clusttemp, clustvalue)) {
clustvalue = clusttemp.clone();
}
else {
for (i = 0; i < cSize; i++) {
l = 0;
k = 0;
for (j = 0; j < numSize; j++) {
if (clustvalue[j] == i) {
l += arrayX[j];
k += arrayY[j];
x++;
}
}
clustX[i] = (double) l / x;
clustY[i] = (double) k / x;
x = 0;
}
System.out.println("\nFinal Cluster Values:");
for (i = 0; i < cSize; i++) {
System.out.println("Cluster Value " + i + " =(" + clustX[i] + "," + clustY[i] + ")");
}
break;}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment