Skip to content

Instantly share code, notes, and snippets.

@gpedote
Created June 23, 2017 16:05
Show Gist options
  • Save gpedote/68a6678f50fd9f07ac3889ff0bd57102 to your computer and use it in GitHub Desktop.
Save gpedote/68a6678f50fd9f07ac3889ff0bd57102 to your computer and use it in GitHub Desktop.
Dunn index for sklearn-generated clusters
def dunn(c, distances):
"""
Dunn index for cluster validation (the bigger, the better)
.. math:: D = \\min_{i = 1 \\ldots n_c; j = i + 1\ldots n_c} \\left\\lbrace \\frac{d \\left( c_i,c_j \\right)}{\\max_{k = 1 \\ldots n_c} \\left(diam \\left(c_k \\right) \\right)} \\right\\rbrace
where :math:`d(c_i,c_j)` represents the distance between
clusters :math:`c_i` and :math:`c_j`, given by the distances between its
two closest data points, and :math:`diam(c_k)` is the diameter of cluster
:math:`c_k`, given by the distance between its two farthest data points.
The bigger the value of the resulting Dunn index, the better the clustering
result is considered, since higher values indicate that clusters are
compact (small :math:`diam(c_k)`) and far apart.
.. [Kovacs2005] Kovács, F., Legány, C., & Babos, A. (2005). Cluster validity measurement techniques. 6th International Symposium of Hungarian Researchers on Computational Intelligence.
"""
unique_cluster_distances = np.unique(min_cluster_distances(c, distances))
max_diameter = max(diameter(c, distances))
if np.size(unique_cluster_distances) > 1:
return unique_cluster_distances[1] / max_diameter
else:
return unique_cluster_distances[0] / max_diameter
def min_cluster_distances(c, distances):
"""Calculates the distances between the two nearest points of each cluster"""
min_distances = np.zeros((max(c) + 1, max(c) + 1))
for i in np.arange(0, len(c)):
if c[i] == -1: continue
for ii in np.arange(i + 1, len(c)):
if c[ii] == -1: continue
if c[i] != c[ii] and distances[i, ii] > min_distances[c[i], c[ii]]:
min_distances[c[i], c[ii]] = min_distances[c[ii], c[i]] = distances[i, ii]
return min_distances
def diameter(c, distances):
"""Calculates cluster diameters (the distance between the two farthest data points in a cluster)"""
diameters = np.zeros(max(c) + 1)
for i in np.arange(0, len(c)):
if c[i] == -1: continue
for ii in np.arange(i + 1, len(c)):
if c[ii] == -1: continue
if c[i] != -1 or c[ii] != -1 and c[i] == c[ii] and distances[i, ii] > diameters[c[i]]:
diameters[c[i]] = distances[i, ii]
return diameters
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment