This is a recipe for using Sklearn to build a cosine similarity matrix and then to build dendrograms from it.
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy
import scipy.spatial.distance
from scipy.spatial.distance import pdist
from sklearn.metrics.pairwise import cosine_similarity
# Make a "feature matrix" of 15 items that will be the binary representation of each index.
# That is, 0001, 0010, ... , 0111, 1111. We will then get the cosine distance between each
# integer using this binary feature.
M = []
L = []
rng = range(1,16)
for i in rng:
astr = '{:04b}'.format(i)
M.append(list(map(int, astr)))
L.append(astr)
# Get the cosine similarity matrix from the feature matrix
c = cosine_similarity(M, M)
c = np.nan_to_num(c)
c = 1.0 - c # Invert the similarity so that 0 is close and 1 is far.
np.fill_diagonal(c, 0)
c = np.clip(c, 0, 1)
# Now make a distance matrix to pass into the clustering method.
pdist = scipy.spatial.distance.squareform(pdist(c, 'sqeuclidean'))
# Print examples of each linkage method.
for method in ['single', 'complete', 'average', 'weighted']:
Z = scipy.cluster.hierarchy.linkage(pdist, method=method)
R = scipy.cluster.hierarchy.inconsistent(Z, d=2)
plt.clf()
fig = plt.figure()
ax = fig.add_axes([.1, .1, .8, .8])
dd = scipy.cluster.hierarchy.dendrogram(Z, labels=L, leaf_font_size=7, ax=ax)
plt.savefig('{}.pdf'.format(method))