Last active
October 17, 2015 04:13
-
-
Save idoan/b0a1e1a33055f0d13051 to your computer and use it in GitHub Desktop.
Main file of Kernel K-Means implementation for DataMining - Clustering class in Coursera with my changes added.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from LoadData import * | |
from k_means import * | |
from evaluation import * | |
from kernel_k_means import * | |
import matplotlib | |
import matplotlib.pyplot as plt | |
from mpl_toolkits.mplot3d import Axes3D | |
if __name__ == "__main__": | |
if len(sys.argv) != 4: | |
print "[usage] <data-file> <ground-truth-file> <K-value>" | |
exit(1) | |
dataFilename = sys.argv[1] | |
groundtruthFilename = sys.argv[2] | |
K = int(sys.argv[3]) | |
old_data = loadPoints(dataFilename) | |
groundtruth = loadClusters(groundtruthFilename) | |
sigma = 4.0 | |
#dimension of data | |
nDimData = len(old_data[0]) | |
#number of data points | |
nData = len(old_data) | |
data = kernel(old_data, sigma) | |
#size of groundtruth labels, R = 4 for [0,1,2,3] | |
R = len(set(groundtruth)) | |
print "size of data = %d \ndimension of data = %d" % (nData,nDimData) | |
#normalize groundtruth's classes to 0-indexed ones. [1,2,3] -> [0,1,2] | |
min_val = min(set(groundtruth)) | |
if min_val>0: | |
groundtruth = [x-min_val for x in groundtruth] | |
centers = [] | |
for i in range(K): | |
centers.append(data[i]) | |
results = kmeans(data, centers) | |
res_Purity = purity(results, groundtruth) | |
res_NMI = NMI(results, groundtruth) | |
if R>2: | |
fig = plt.figure() | |
ax = fig.add_subplot(111,projection='3d') | |
colorarr=["b","r","y","g","m"] | |
for k in range(nData): | |
plt.hold(True) | |
if R>2: | |
ax.scatter(old_data[k][0], old_data[k][1],old_data[k][2],c=colorarr[results[k]]) | |
else: | |
plt.scatter(old_data[k][0], old_data[k][1],c=colorarr[results[k]]) | |
#configurations to place the text top left of the plot | |
x_axes = plt.gca().get_xlim() | |
y_axes = plt.gca().get_ylim() | |
if R>2: | |
ax.text(x_axes[0] + 0.03*abs(x_axes[0]-x_axes[1]),y_axes[0] + 0.85*abs(y_axes[0]-y_axes[1]),0.9, | |
"Method = %s\nFile = %s \nsize of Data = %dx%d \nK = %d \nsize of Ground Truth R = %d" %("Kernel K-Means",dataFilename,nData,nDimData,K,R),color="g") | |
else: | |
plt.text(x_axes[0] + 0.03*abs(x_axes[0]-x_axes[1]),y_axes[0] + 0.85*abs(y_axes[0]-y_axes[1]), | |
"Method = %s\nFile = %s \nsize of Data = %dx%d \nK = %d \nsize of Ground Truth R = %d" %("Kernel K-Means",dataFilename,nData,nDimData,K,R),color="g") | |
plt.show() | |
#picname=dataFilename+"_Kernel_K-Means_" + str(K) + "_" + ".png" | |
#matplotlib.pyplot.savefig(picname) | |
print "Purity =", res_Purity | |
print "NMI = ", res_NMI | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment