Created
May 3, 2011 17:57
-
-
Save davidandrzej/953840 to your computer and use it in GitHub Desktop.
Example usage of blockviz.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Example blockviz usage with synthetic generated data | |
Requires scaledimage.py and blockviz.py | |
David Andrzejewski | |
""" | |
import numpy as NP | |
import numpy.random as NPR | |
import matplotlib.pyplot as P | |
import scipy.spatial.distance as DIST | |
import matplotlib.ticker as MT | |
import blockviz as BV | |
import scaledimage as SI | |
# | |
# Generate synthetic test dataset | |
# | |
(nclust, nfeat, ndata) = (3, 5, 20) | |
# Generate cluster means | |
clustmeans = NPR.standard_normal((nfeat, nclust)) | |
# Sample cluster assignments for each data point | |
dataclust = NPR.randint(0, nclust, (ndata,)) | |
# Generate data points | |
data = NP.zeros((nfeat, ndata)) | |
covar = NP.identity(nfeat) # diagonal covariance | |
for clust in range(nclust): | |
clustidx = NP.where(dataclust == clust)[0] | |
datapoints = NPR.multivariate_normal(clustmeans[:,clust], | |
covar, | |
(len(clustidx),)).T | |
data[:,clustidx] = datapoints | |
# Calculate affinity as logistic transform of negative cosine distance | |
cosdist = DIST.squareform(DIST.pdist(data.T, metric='cosine')) | |
vecLog = NP.vectorize(BV.logistic) | |
affinity = vecLog(-1 * cosdist) | |
# Do non-block visualization of similarity | |
P.subplot(1,2,1) | |
SI.scaledimage(affinity, pixwidth=3, ax=P.gca()) | |
P.title('Raw affinity matrix') | |
# Do block visualization | |
P.subplot(1,2,2) | |
BV.blockviz(affinity, | |
nclust + 1, # why + 1 ? see below | |
ax=P.gca()) | |
# Draw nice ticklabels | |
labels = [str(i) for i in range(ndata)] | |
P.gca().yaxis.set_major_locator(MT.IndexLocator(3,1)) | |
P.gca().yaxis.set_ticklabels(list(reversed(labels))) | |
P.gca().xaxis.set_major_locator(MT.IndexLocator(3,1)) | |
P.gca().xaxis.set_ticklabels(labels, rotation=90) | |
# | |
# On this synth data it seems that SpectralClustering | |
# cannot resist making exactly one singleton cluster...? | |
# | |
# So to get 'right' cluster picture, we tell it | |
# we want the true number of clusters + 1 | |
# | |
P.title('After spectral clustering') | |
P.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment