Created
February 19, 2010 05:26
-
-
Save abakan-zz/308460 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def single_linkage(dist, cutoff): | |
"""Return clusters determined by single-linkage agglomerative clustering. | |
For speedup, distance matrix may contain square-distances and | |
cutoff distance may be cutoff-square. | |
Parameters | |
---------- | |
dist : np.ndarray | |
Distance matrix. Strictly lower triangular or a symmetric matrix | |
with 0s along the diagonal are acceptable forms. | |
cutoff : float | |
Distance within which two items will be merged to make a cluster. | |
Returns | |
------- | |
clusters : np.ndarray | |
An array of cluster assignments. Items in the same cluster will have | |
save cluster number. | |
""" | |
if not isinstance(dist, np.ndarray): | |
raise TypeError('dist must be of type numpy.ndarray') | |
elif dist.ndim != 2: | |
raise ValueError('dist must be a 2-dimensional array') | |
elif dist.shape[0] != dist.shape[1]: | |
raise ValueError('dist must be a square matrix') | |
size = dist.shape[0] | |
clust = [set([i]) for i in range(size)] | |
for i in range(size): | |
which = (dist[i:, i] <= cutoff).nonzero()[0] + i | |
new = [] | |
for j in which: | |
new += clust[j] | |
for j in new: | |
clust[j] = new | |
clusters = - np.ones(size, 'i') | |
iclust = 0 | |
for i in range(size): | |
if clusters[i] == -1: | |
clusters[clust[i]] = iclust | |
iclust += 1 | |
return clusters |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment