Skip to content

Instantly share code, notes, and snippets.

@idoan
Last active October 17, 2015 04:13
Show Gist options
  • Save idoan/b0a1e1a33055f0d13051 to your computer and use it in GitHub Desktop.
Save idoan/b0a1e1a33055f0d13051 to your computer and use it in GitHub Desktop.
Main file of Kernel K-Means implementation for DataMining - Clustering class in Coursera with my changes added.
import sys
from LoadData import *
from k_means import *
from evaluation import *
from kernel_k_means import *
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
if __name__ == "__main__":
if len(sys.argv) != 4:
print "[usage] <data-file> <ground-truth-file> <K-value>"
exit(1)
dataFilename = sys.argv[1]
groundtruthFilename = sys.argv[2]
K = int(sys.argv[3])
old_data = loadPoints(dataFilename)
groundtruth = loadClusters(groundtruthFilename)
sigma = 4.0
#dimension of data
nDimData = len(old_data[0])
#number of data points
nData = len(old_data)
data = kernel(old_data, sigma)
#size of groundtruth labels, R = 4 for [0,1,2,3]
R = len(set(groundtruth))
print "size of data = %d \ndimension of data = %d" % (nData,nDimData)
#normalize groundtruth's classes to 0-indexed ones. [1,2,3] -> [0,1,2]
min_val = min(set(groundtruth))
if min_val>0:
groundtruth = [x-min_val for x in groundtruth]
centers = []
for i in range(K):
centers.append(data[i])
results = kmeans(data, centers)
res_Purity = purity(results, groundtruth)
res_NMI = NMI(results, groundtruth)
if R>2:
fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
colorarr=["b","r","y","g","m"]
for k in range(nData):
plt.hold(True)
if R>2:
ax.scatter(old_data[k][0], old_data[k][1],old_data[k][2],c=colorarr[results[k]])
else:
plt.scatter(old_data[k][0], old_data[k][1],c=colorarr[results[k]])
#configurations to place the text top left of the plot
x_axes = plt.gca().get_xlim()
y_axes = plt.gca().get_ylim()
if R>2:
ax.text(x_axes[0] + 0.03*abs(x_axes[0]-x_axes[1]),y_axes[0] + 0.85*abs(y_axes[0]-y_axes[1]),0.9,
"Method = %s\nFile = %s \nsize of Data = %dx%d \nK = %d \nsize of Ground Truth R = %d" %("Kernel K-Means",dataFilename,nData,nDimData,K,R),color="g")
else:
plt.text(x_axes[0] + 0.03*abs(x_axes[0]-x_axes[1]),y_axes[0] + 0.85*abs(y_axes[0]-y_axes[1]),
"Method = %s\nFile = %s \nsize of Data = %dx%d \nK = %d \nsize of Ground Truth R = %d" %("Kernel K-Means",dataFilename,nData,nDimData,K,R),color="g")
plt.show()
#picname=dataFilename+"_Kernel_K-Means_" + str(K) + "_" + ".png"
#matplotlib.pyplot.savefig(picname)
print "Purity =", res_Purity
print "NMI = ", res_NMI
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment