Skip to content

Instantly share code, notes, and snippets.

@agramfort
Created July 15, 2017 17:20
Show Gist options
  • Save agramfort/c3236b27dcf30608c9456210a02a4e60 to your computer and use it in GitHub Desktop.
Save agramfort/c3236b27dcf30608c9456210a02a4e60 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""Ordering Points To Identify the Clustering Structure (OPTICS)
These routines execute the OPTICS algorithm, and implement various
cluster extraction methods of the ordered list.
Authors: Shane Grigsby <[email protected]>
Amy X. Zhang <[email protected]>
License: BSD 3 clause
"""
import warnings
import numpy as np
from scipy import iterable
from ..utils import check_array
from ..utils.validation import check_is_fitted
from ..neighbors import BallTree
from ..base import BaseEstimator, ClusterMixin
from ..metrics.pairwise import pairwise_distances
from ._optics_inner import quick_scan
class _SetOfObjects(BallTree):
"""Build balltree data structure with processing index.
It's built from given data in preparation for OPTICS Algorithm
Parameters
----------
X: array, shape (n_samples, n_features)
Input data.
metric : str
The metric used.
**kwargs:
kwargs passed to BallTree init.
"""
def __init__(self, X, metric, **kwargs):
super(_SetOfObjects, self).__init__(X,
metric=metric, **kwargs)
self._n = len(self.data)
self.metric = metric
# Start all points as 'unprocessed' ##
self._processed = np.zeros((self._n, 1), dtype=bool)
self.reachability_ = np.ones(self._n) * np.inf
self.core_dists_ = np.ones(self._n) * np.nan
# Start all points as noise ##
self._cluster_id = -np.ones(self._n, dtype=int)
self._is_core = np.zeros(self._n, dtype=bool)
self.ordering_ = []
# OPTICS helper functions; these should not be public #
def _prep_optics(self, min_samples):
self.core_dists_[:] = self.query(self.get_arrays()[0],
k=min_samples)[0][:, -1]
def _build_optics(setofobjects, epsilon):
# Main OPTICS loop. Not parallelizable. The order that entries are
# written to the 'ordering_' list is important!
for point in range(setofobjects._n):
if not setofobjects._processed[point]:
_expand_cluster_order(setofobjects, point, epsilon)
def _expand_cluster_order(setofobjects, point, epsilon):
# As above, not parallelizable. Parallelizing would allow items in
# the 'unprocessed' list to switch to 'processed'
if setofobjects.core_dists_[point] <= epsilon:
while not setofobjects._processed[point]:
setofobjects._processed[point] = True
setofobjects.ordering_.append(point)
point = _set_reach_dist(setofobjects, point, epsilon)
else:
setofobjects.ordering_.append(point) # For very noisy points
setofobjects._processed[point] = True
def _set_reach_dist(setofobjects, point_index, epsilon):
X = np.array(setofobjects.data[point_index]).reshape(1, -1)
indices = setofobjects.query_radius(X, r=epsilon,
return_distance=False,
count_only=False,
sort_results=False)[0]
# Checks to see if there more than one member in the neighborhood
if iterable(indices):
# Masking processed values; n_pr is 'not processed'
n_pr = np.compress((np.take(setofobjects._processed,
indices, axis=0) < 1).ravel(),
indices, axis=0)
# n_pr = indices[(setofobjects._processed[indices] < 1).ravel()]
if len(n_pr) > 0:
dists = pairwise_distances(X,
np.take(setofobjects.get_arrays()[0],
n_pr,
axis=0),
setofobjects.metric, n_jobs=1).ravel()
rdists = np.maximum(dists, setofobjects.core_dists_[point_index])
new_reach = np.minimum(np.take(setofobjects.reachability_,
n_pr, axis=0),
rdists)
setofobjects.reachability_[n_pr] = new_reach
# Checks to see if everything is already processed;
# if so, return control to main loop
if n_pr.size > 0:
# Define return order based on reachability distance
return(n_pr[quick_scan(np.take(setofobjects.reachability_,
n_pr, axis=0), dists)])
else:
return point_index
class OPTICS(BaseEstimator, ClusterMixin):
"""Estimate clustering structure from vector array
OPTICS: Ordering Points To Identify the Clustering Structure
Equivalent to DBSCAN, finds core sample of high density and expands
clusters from them. Unlike DBSCAN, keeps cluster hierarchy for varying
epsilon values. Optimized for usage on large point datasets.
Parameters
----------
eps : float, optional
The maximum distance between two samples for them to be considered
as in the same neighborhood. This is also the largest object size
expected within the dataset. Lower eps values can be used after
OPTICS is run the first time, with fast returns of labels. Default
value of "np.inf" will identify clusters across all scales; reducing
eps will result in shorter run times.
min_samples : int, optional
The number of samples in a neighborhood for a point to be considered
as a core point.
metric : string or callable, optional
The distance metric to use for neighborhood lookups. Default is
"minkowski". Other options include “euclidean”, “manhattan”,
“chebyshev”, “haversine”, “seuclidean”, “hamming”, “canberra”,
and “braycurtis”. The “wminkowski” and “mahalanobis” metrics are
also valid with an additional argument.
Attributes
----------
`core_sample_indices_` : array, shape (n_core_samples,)
Indices of core samples.
`labels_` : array, shape (n_samples,)
Cluster labels for each point in the dataset given to fit().
Noisy samples are given the label -1.
`reachability_` : array, shape (n_samples,)
Reachability distances per sample.
`ordering_` : array, shape (n_samples,)
The cluster ordered list of sample indices
`core_dists_` : array, shape (n_samples,)
Distance at which each sample becomes a core point.
Points which will never be core have a distance of inf.
References
----------
Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
"OPTICS: ordering points to identify the clustering structure." ACM SIGMOD
Record 28, no. 2 (1999): 49-60.
"""
def __init__(self, eps=np.inf, min_samples=5, metric='euclidean'):
self.eps = eps
self.min_samples = min_samples
self.metric = metric
def fit(self, X, y=None):
"""Perform OPTICS clustering
Extracts an ordered list of points and reachability distances, and
performs initial clustering using 'eps' distance specified at OPTICS
object instantiation.
Parameters
----------
X : array, shape (n_samples, n_features)
The data.
Returns
-------
self : instance of OPTICS
The instance.
"""
X = check_array(X)
n_samples = len(X)
# Check for valid n_samples relative to min_samples
if self.min_samples > n_samples:
raise ValueError("Number of training samples (%d) must be greater "
"than min_samples (%d) used for clustering." %
(n_samples, self.min_samples))
self.tree_ = _SetOfObjects(X, self.metric)
_prep_optics(self.tree_, self.min_samples)
_build_optics(self.tree_, self.eps * 5.0)
self.extract_auto()
self.core_sample_indices_ = np.arange(self._is_core)
self.n_clusters_ = max(self._cluster_id)
return self
@property
def reachability_(self):
return self.tree_.reachability_[:]
@property
def core_dists_(self):
return self.tree_.core_dists_[:]
@property
def _cluster_id(self):
return self.tree_._cluster_id[:]
@property
def _is_core(self):
return self.tree_._is_core[:]
@property
def _index(self):
return self.tree_._index[:]
@property
def ordering_(self):
return self.tree_.ordering_[:]
@property
def labels_(self):
return self._cluster_id[:]
def extract(self, epsilon_prime, clustering='auto', **kwargs):
"""Performs Automatic extraction for an arbitrary epsilon.
It can also do DBSCAN equivalent and it can be run multiple times.
See extract_auto() for full description of parameters
Parameters
----------
epsilon_prime: float or int, optional
Used for 'dbscan' clustering. Must be less than or equal to what
was used for prep and build steps
clustering: {'auto', 'dbscan'}, optional
Type of cluster extraction to perform; defaults to 'auto'.
Returns
-------
core_sample_indices_: array, shape (n_core_samples,)
The indices of the core samples.
labels_ : array, shape (n_samples,)
The estimated labels.
"""
check_is_fitted(self, 'reachability_')
if epsilon_prime > self.eps * 5.0:
raise ValueError('Specify an epsilon smaller than %s. Got %s.'
% (self.eps * 5, epsilon_prime))
if clustering == 'dbscan':
self.eps_prime = epsilon_prime
_extract_DBSCAN(self, epsilon_prime)
elif clustering == 'auto':
self.extract_auto(**kwargs)
self.core_sample_indices_ = \
np.arange(len(self.reachability_))[self._is_core == 1]
self.n_clusters = max(self._cluster_id)
if epsilon_prime > (self.eps * 1.05):
warnings.warn(
"Warning, eps (%s) is close to epsilon_prime (%s): "
"Output may be unstable." % (self.eps, epsilon_prime),
RuntimeWarning, stacklevel=2)
# XXX explain 5% thing
return self.core_sample_indices_, self.labels_
def extract_auto(self,
maxima_ratio=.75,
rejection_ratio=.7,
similarity_threshold=0.4,
significant_min=.003,
min_cluster_size_ratio=.005,
min_maxima_ratio=0.001):
"""Performs automatic cluster extraction for variable density data.
Can be run multiple times with adjusted parameters. Only returns
core and noise labels.
Parameters
----------
maxima_ratio: float, optional
The maximum ratio we allow of average height of clusters on the
right and left to the local maxima in question. The higher the
ratio, the more generous the algorithm is to preserving local
minimums, and the more cuts the resulting tree will have.
rejection_ratio: float, optional
Adjusts the fitness of the clustering. When the maxima_ratio is
exceeded, determine which of the clusters to the left and right to
reject based on rejection_ratio
similarity_threshold: float, optional
Used to check if nodes can be moved up one level, that is, if the
new cluster created is too "similar" to its parent, given the
similarity threshold. Similarity can be determined by 1) the size
of the new cluster relative to the size of the parent node or
2) the average of the reachability values of the new cluster
relative to the average of the reachability values of the parent
node. A lower value for the similarity threshold means less levels
in the tree.
significant_min: float, optional
Sets a lower threshold on how small a significant maxima can be
min_cluster_size_ratio: float, optional
Minimum percentage of dataset expected for cluster membership.
min_maxima_ratio: float, optional
Used to determine neighborhood size for minimum cluster membership.
"""
check_is_fitted(self, 'reachability_')
# Extraction wrapper
RPlot = self.reachability_[self.ordering_].tolist()
RPoints = self.ordering_
root_node = _automatic_cluster(RPlot, RPoints, maxima_ratio,
rejection_ratio, similarity_threshold,
significant_min, min_cluster_size_ratio,
min_maxima_ratio)
leaves = _get_leaves(root_node, [])
# Start cluster id's at 1
clustid = 1
# Start all points as non-core noise
self._cluster_id[:] = -1
self._is_core[:] = 0
for leaf in leaves:
index = self.ordering_[leaf.start:leaf.end]
self._cluster_id[index] = clustid
self._is_core[index] = 1
clustid += 1
def _automatic_cluster(RPlot, RPoints, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min,
min_cluster_size_ratio, min_maxima_ratio):
min_neighborhood_size = 2
min_cluster_size = int(min_cluster_size_ratio * len(RPoints))
neighborhood_size = int(min_maxima_ratio * len(RPoints))
# Should this check for < min_samples? Should this be public?
if min_cluster_size < 5:
min_cluster_size = 5
# Again, should this check < min_samples, should the parameter be public?
if neighborhood_size < min_neighborhood_size:
neighborhood_size = min_neighborhood_size
local_maxima_points = _find_local_maxima(RPlot, RPoints, neighborhood_size)
root_node = _TreeNode(RPoints, 0, len(RPoints), None)
_cluster_tree(root_node, None, local_maxima_points,
RPlot, RPoints, min_cluster_size,
maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
return root_node
class _TreeNode(object):
# automatic cluster helper classes and functions
def __init__(self, points, start, end, parent_node):
self.points = points
self.start = start
self.end = end
self.parent_node = parent_node
self.children = []
self.split_point = -1
def assign_split_point(self, split_point):
self.split_point = split_point
def add_child(self, child):
self.children.append(child)
def _is_local_maxima(index, RPlot, RPoints, neighborhood_size):
# 0 = point at index is not local maxima
# 1 = point at index is local maxima
for i in range(1, neighborhood_size + 1):
# process objects to the right of index
if index + i < len(RPlot):
if (RPlot[index] < RPlot[index + i]):
return 0
# process objects to the left of index
if index - i >= 0:
if (RPlot[index] < RPlot[index - i]):
return 0
return 1
def _find_local_maxima(RPlot, RPoints, neighborhood_size):
local_maxima_points = {}
# 1st and last points on Reachability Plot are not taken
# as local maxima points
for i in range(1, len(RPoints) - 1):
# if the point is a local maxima on the reachability plot with
# regard to neighborhood_size, insert it into priority queue and
# maxima list
if (RPlot[i] > RPlot[i - 1] and RPlot[i] >= RPlot[i + 1] and
_is_local_maxima(i, RPlot, RPoints, neighborhood_size) == 1):
local_maxima_points[i] = RPlot[i]
return sorted(local_maxima_points,
key=local_maxima_points.__getitem__, reverse=True)
def _cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
min_cluster_size, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min):
# node is a node or the root of the tree in the first call
# parent_node is parent node of N or None if node is root of the tree
# local_maxima_points is list of local maxima points sorted in
# descending order of reachability
if len(local_maxima_points) == 0:
return # parent_node is a leaf
# take largest local maximum as possible separation between clusters
s = local_maxima_points[0]
node.assign_split_point(s)
local_maxima_points = local_maxima_points[1:]
# create two new nodes and add to list of nodes
node_1 = _TreeNode(RPoints[node.start:s], node.start, s, node)
node_2 = _TreeNode(RPoints[s + 1:node.end], s + 1, node.end, node)
local_max_1 = []
local_max_2 = []
for i in local_maxima_points:
if i < s:
local_max_1.append(i)
if i > s:
local_max_2.append(i)
node_list = []
node_list.append((node_1, local_max_1))
node_list.append((node_2, local_max_2))
if RPlot[s] < significant_min:
node.assign_split_point(-1)
# if split_point is not significant, ignore this split and continue
_cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
min_cluster_size, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
return
# only check a certain ratio of points in the child
# nodes formed to the left and right of the maxima
# ...should check_ratio be a user settable parameter?
check_ratio = .8
check_value_1 = int(np.round(check_ratio * len(node_1.points)))
check_value_2 = int(np.round(check_ratio * len(node_2.points)))
if check_value_2 == 0:
check_value_2 = 1
avg_reach_value_1 = float(np.average(RPlot[(node_1.end -
check_value_1):node_1.end]))
avg_reach_value_2 = float(np.average(RPlot[node_2.start:(node_2.start +
check_value_2)]))
if (float(avg_reach_value_1 / float(RPlot[s])) >
maxima_ratio or float(avg_reach_value_2 /
float(RPlot[s])) > maxima_ratio):
if float(avg_reach_value_1 / float(RPlot[s])) < rejection_ratio:
# reject node 2
node_list.remove((node_2, local_max_2))
if float(avg_reach_value_2 / float(RPlot[s])) < rejection_ratio:
# reject node 1
node_list.remove((node_1, local_max_1))
if (float(avg_reach_value_1 / float(RPlot[s])) >=
rejection_ratio and float(avg_reach_value_2 /
float(RPlot[s])) >= rejection_ratio):
node.assign_split_point(-1)
# since split_point is not significant,
# ignore this split and continue (reject both child nodes)
_cluster_tree(node, parent_node, local_maxima_points,
RPlot, RPoints, min_cluster_size,
maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
return
# remove clusters that are too small
if (len(node_1.points) < min_cluster_size and
node_list.count((node_1, local_max_1)) > 0):
# cluster 1 is too small
node_list.remove((node_1, local_max_1))
if (len(node_2.points) < min_cluster_size and
node_list.count((node_2, local_max_1)) > 0):
# cluster 2 is too small
node_list.remove((node_2, local_max_2))
if len(node_list) == 0:
# parent_node will be a leaf
node.assign_split_point(-1)
return
# Check if nodes can be moved up one level - the new cluster created
# is too "similar" to its parent, given the similarity threshold.
bypass_node = 0
if parent_node is not None:
if (float(float(node.end - node.start) /
float(parent_node.end - parent_node.start)) >
similarity_threshold):
parent_node.children.remove(node)
bypass_node = 1
for nl in node_list:
if bypass_node == 1:
parent_node.add_child(nl[0])
_cluster_tree(nl[0], parent_node, nl[1], RPlot, RPoints,
min_cluster_size, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
else:
node.add_child(nl[0])
_cluster_tree(nl[0], node, nl[1], RPlot, RPoints, min_cluster_size,
maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
def _get_leaves(node, arr):
if node is not None:
if node.split_point == -1:
arr.append(node)
for n in node.children:
_get_leaves(n, arr)
return arr
def _extract_DBSCAN(setofobjects, epsilon_prime):
# Extract DBSCAN Equivalent cluster structure
# Important: Epsilon prime should be less than epsilon used in OPTICS
cluster_id = 0
for entry in setofobjects.ordering_:
if setofobjects.reachability_[entry] > epsilon_prime:
if setofobjects.core_dists_[entry] <= epsilon_prime:
cluster_id += 1
setofobjects._cluster_id[entry] = cluster_id
else:
# This is only needed for compatibility for repeated scans.
setofobjects._cluster_id[entry] = -1 # Noise points
setofobjects._is_core[entry] = 0
else:
setofobjects._cluster_id[entry] = cluster_id
if setofobjects.core_dists_[entry] <= epsilon_prime:
setofobjects._is_core[entry] = 1 # True
else:
setofobjects._is_core[entry] = 0 # False
# -*- coding: utf-8 -*-
"""Ordering Points To Identify the Clustering Structure (OPTICS)
These routines execute the OPTICS algorithm, and implement various
cluster extraction methods of the ordered list.
Authors: Shane Grigsby <[email protected]>
Amy X. Zhang <[email protected]>
License: BSD 3 clause
"""
import warnings
import numpy as np
from scipy import iterable
from ..utils import check_array
from ..utils.validation import check_is_fitted
from ..neighbors import BallTree
from ..base import BaseEstimator, ClusterMixin
from ..metrics.pairwise import pairwise_distances
from ._optics_inner import quick_scan
class _SetOfObjects(BallTree):
"""Build balltree data structure with processing index.
It's built from given data in preparation for OPTICS Algorithm
Parameters
----------
X: array, shape (n_samples, n_features)
Input data.
metric : str
The metric used.
**kwargs:
kwargs passed to BallTree init.
"""
def __init__(self, X, metric, **kwargs):
super(_SetOfObjects, self).__init__(X,
metric=metric, **kwargs)
self._n = len(self.data)
self.metric = metric
# Start all points as 'unprocessed' ##
self._processed = np.zeros((self._n, 1), dtype=bool)
self.reachability_ = np.ones(self._n) * np.inf
self.core_dists_ = np.ones(self._n) * np.nan
# Start all points as noise ##
self._cluster_id = -np.ones(self._n, dtype=int)
self._is_core = np.zeros(self._n, dtype=bool)
self.ordering_ = []
# OPTICS helper functions; these should not be public #
def _prep_optics(self, min_samples):
self.core_dists_[:] = self.query(self.get_arrays()[0],
k=min_samples)[0][:, -1]
def _build_optics(setofobjects, epsilon):
# Main OPTICS loop. Not parallelizable. The order that entries are
# written to the 'ordering_' list is important!
for point in range(setofobjects._n):
if not setofobjects._processed[point]:
_expand_cluster_order(setofobjects, point, epsilon)
def _expand_cluster_order(setofobjects, point, epsilon):
# As above, not parallelizable. Parallelizing would allow items in
# the 'unprocessed' list to switch to 'processed'
if setofobjects.core_dists_[point] <= epsilon:
while not setofobjects._processed[point]:
setofobjects._processed[point] = True
setofobjects.ordering_.append(point)
point = _set_reach_dist(setofobjects, point, epsilon)
else:
setofobjects.ordering_.append(point) # For very noisy points
setofobjects._processed[point] = True
def _set_reach_dist(setofobjects, point_index, epsilon):
X = np.array(setofobjects.data[point_index]).reshape(1, -1)
indices = setofobjects.query_radius(X, r=epsilon,
return_distance=False,
count_only=False,
sort_results=False)[0]
# Checks to see if there more than one member in the neighborhood
if iterable(indices):
# Masking processed values; n_pr is 'not processed'
n_pr = np.compress((np.take(setofobjects._processed,
indices, axis=0) < 1).ravel(),
indices, axis=0)
# n_pr = indices[(setofobjects._processed[indices] < 1).ravel()]
if len(n_pr) > 0:
dists = pairwise_distances(X,
np.take(setofobjects.get_arrays()[0],
n_pr,
axis=0),
setofobjects.metric, n_jobs=1).ravel()
rdists = np.maximum(dists, setofobjects.core_dists_[point_index])
new_reach = np.minimum(np.take(setofobjects.reachability_,
n_pr, axis=0),
rdists)
setofobjects.reachability_[n_pr] = new_reach
# Checks to see if everything is already processed;
# if so, return control to main loop
if n_pr.size > 0:
# Define return order based on reachability distance
return(n_pr[quick_scan(np.take(setofobjects.reachability_,
n_pr, axis=0), dists)])
else:
return point_index
class OPTICS(BaseEstimator, ClusterMixin):
"""Estimate clustering structure from vector array
OPTICS: Ordering Points To Identify the Clustering Structure
Equivalent to DBSCAN, finds core sample of high density and expands
clusters from them. Unlike DBSCAN, keeps cluster hierarchy for varying
epsilon values. Optimized for usage on large point datasets.
Parameters
----------
eps : float, optional
The maximum distance between two samples for them to be considered
as in the same neighborhood. This is also the largest object size
expected within the dataset. Lower eps values can be used after
OPTICS is run the first time, with fast returns of labels. Default
value of "np.inf" will identify clusters across all scales; reducing
eps will result in shorter run times.
min_samples : int, optional
The number of samples in a neighborhood for a point to be considered
as a core point.
metric : string or callable, optional
The distance metric to use for neighborhood lookups. Default is
"minkowski". Other options include “euclidean”, “manhattan”,
“chebyshev”, “haversine”, “seuclidean”, “hamming”, “canberra”,
and “braycurtis”. The “wminkowski” and “mahalanobis” metrics are
also valid with an additional argument.
Attributes
----------
`core_sample_indices_` : array, shape (n_core_samples,)
Indices of core samples.
`labels_` : array, shape (n_samples,)
Cluster labels for each point in the dataset given to fit().
Noisy samples are given the label -1.
`reachability_` : array, shape (n_samples,)
Reachability distances per sample.
`ordering_` : array, shape (n_samples,)
The cluster ordered list of sample indices
`core_dists_` : array, shape (n_samples,)
Distance at which each sample becomes a core point.
Points which will never be core have a distance of inf.
References
----------
Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
"OPTICS: ordering points to identify the clustering structure." ACM SIGMOD
Record 28, no. 2 (1999): 49-60.
"""
def __init__(self, eps=np.inf, min_samples=5, metric='euclidean'):
self.eps = eps
self.min_samples = min_samples
self.metric = metric
def fit(self, X, y=None):
"""Perform OPTICS clustering
Extracts an ordered list of points and reachability distances, and
performs initial clustering using 'eps' distance specified at OPTICS
object instantiation.
Parameters
----------
X : array, shape (n_samples, n_features)
The data.
Returns
-------
self : instance of OPTICS
The instance.
"""
X = check_array(X)
n_samples = len(X)
# Check for valid n_samples relative to min_samples
if self.min_samples > n_samples:
raise ValueError("Number of training samples (%d) must be greater "
"than min_samples (%d) used for clustering." %
(n_samples, self.min_samples))
self.tree_ = _SetOfObjects(X, self.metric)
_prep_optics(self.tree_, self.min_samples)
_build_optics(self.tree_, self.eps * 5.0)
self.extract_auto()
self.core_sample_indices_ = np.arange(self._is_core)
self.n_clusters_ = max(self._cluster_id)
return self
@property
def reachability_(self):
return self.tree_.reachability_[:]
@property
def core_dists_(self):
return self.tree_.core_dists_[:]
@property
def _cluster_id(self):
return self.tree_._cluster_id[:]
@property
def _is_core(self):
return self.tree_._is_core[:]
@property
def _index(self):
return self.tree_._index[:]
@property
def ordering_(self):
return self.tree_.ordering_[:]
@property
def labels_(self):
return self._cluster_id[:]
def extract(self, epsilon_prime, clustering='auto', **kwargs):
"""Performs Automatic extraction for an arbitrary epsilon.
It can also do DBSCAN equivalent and it can be run multiple times.
See extract_auto() for full description of parameters
Parameters
----------
epsilon_prime: float or int, optional
Used for 'dbscan' clustering. Must be less than or equal to what
was used for prep and build steps
clustering: {'auto', 'dbscan'}, optional
Type of cluster extraction to perform; defaults to 'auto'.
Returns
-------
core_sample_indices_: array, shape (n_core_samples,)
The indices of the core samples.
labels_ : array, shape (n_samples,)
The estimated labels.
"""
check_is_fitted(self, 'reachability_')
if epsilon_prime > self.eps * 5.0:
raise ValueError('Specify an epsilon smaller than %s. Got %s.'
% (self.eps * 5, epsilon_prime))
if clustering == 'dbscan':
self.eps_prime = epsilon_prime
_extract_DBSCAN(self, epsilon_prime)
elif clustering == 'auto':
self.extract_auto(**kwargs)
self.core_sample_indices_ = \
np.arange(len(self.reachability_))[self._is_core == 1]
self.n_clusters = max(self._cluster_id)
if epsilon_prime > (self.eps * 1.05):
warnings.warn(
"Warning, eps (%s) is close to epsilon_prime (%s): "
"Output may be unstable." % (self.eps, epsilon_prime),
RuntimeWarning, stacklevel=2)
# XXX explain 5% thing
return self.core_sample_indices_, self.labels_
def extract_auto(self,
maxima_ratio=.75,
rejection_ratio=.7,
similarity_threshold=0.4,
significant_min=.003,
min_cluster_size_ratio=.005,
min_maxima_ratio=0.001):
"""Performs automatic cluster extraction for variable density data.
Can be run multiple times with adjusted parameters. Only returns
core and noise labels.
Parameters
----------
maxima_ratio: float, optional
The maximum ratio we allow of average height of clusters on the
right and left to the local maxima in question. The higher the
ratio, the more generous the algorithm is to preserving local
minimums, and the more cuts the resulting tree will have.
rejection_ratio: float, optional
Adjusts the fitness of the clustering. When the maxima_ratio is
exceeded, determine which of the clusters to the left and right to
reject based on rejection_ratio
similarity_threshold: float, optional
Used to check if nodes can be moved up one level, that is, if the
new cluster created is too "similar" to its parent, given the
similarity threshold. Similarity can be determined by 1) the size
of the new cluster relative to the size of the parent node or
2) the average of the reachability values of the new cluster
relative to the average of the reachability values of the parent
node. A lower value for the similarity threshold means less levels
in the tree.
significant_min: float, optional
Sets a lower threshold on how small a significant maxima can be
min_cluster_size_ratio: float, optional
Minimum percentage of dataset expected for cluster membership.
min_maxima_ratio: float, optional
Used to determine neighborhood size for minimum cluster membership.
"""
check_is_fitted(self, 'reachability_')
# Extraction wrapper
RPlot = self.reachability_[self.ordering_].tolist()
RPoints = self.ordering_
root_node = _automatic_cluster(RPlot, RPoints, maxima_ratio,
rejection_ratio, similarity_threshold,
significant_min, min_cluster_size_ratio,
min_maxima_ratio)
leaves = _get_leaves(root_node, [])
# Start cluster id's at 1
clustid = 1
# Start all points as non-core noise
self._cluster_id[:] = -1
self._is_core[:] = 0
for leaf in leaves:
index = self.ordering_[leaf.start:leaf.end]
self._cluster_id[index] = clustid
self._is_core[index] = 1
clustid += 1
def _automatic_cluster(RPlot, RPoints, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min,
min_cluster_size_ratio, min_maxima_ratio):
min_neighborhood_size = 2
min_cluster_size = int(min_cluster_size_ratio * len(RPoints))
neighborhood_size = int(min_maxima_ratio * len(RPoints))
# Should this check for < min_samples? Should this be public?
if min_cluster_size < 5:
min_cluster_size = 5
# Again, should this check < min_samples, should the parameter be public?
if neighborhood_size < min_neighborhood_size:
neighborhood_size = min_neighborhood_size
local_maxima_points = _find_local_maxima(RPlot, RPoints, neighborhood_size)
root_node = _TreeNode(RPoints, 0, len(RPoints), None)
_cluster_tree(root_node, None, local_maxima_points,
RPlot, RPoints, min_cluster_size,
maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
return root_node
class _TreeNode(object):
# automatic cluster helper classes and functions
def __init__(self, points, start, end, parent_node):
self.points = points
self.start = start
self.end = end
self.parent_node = parent_node
self.children = []
self.split_point = -1
def assign_split_point(self, split_point):
self.split_point = split_point
def add_child(self, child):
self.children.append(child)
def _is_local_maxima(index, RPlot, RPoints, neighborhood_size):
# 0 = point at index is not local maxima
# 1 = point at index is local maxima
for i in range(1, neighborhood_size + 1):
# process objects to the right of index
if index + i < len(RPlot):
if (RPlot[index] < RPlot[index + i]):
return 0
# process objects to the left of index
if index - i >= 0:
if (RPlot[index] < RPlot[index - i]):
return 0
return 1
def _find_local_maxima(RPlot, RPoints, neighborhood_size):
local_maxima_points = {}
# 1st and last points on Reachability Plot are not taken
# as local maxima points
for i in range(1, len(RPoints) - 1):
# if the point is a local maxima on the reachability plot with
# regard to neighborhood_size, insert it into priority queue and
# maxima list
if (RPlot[i] > RPlot[i - 1] and RPlot[i] >= RPlot[i + 1] and
_is_local_maxima(i, RPlot, RPoints, neighborhood_size) == 1):
local_maxima_points[i] = RPlot[i]
return sorted(local_maxima_points,
key=local_maxima_points.__getitem__, reverse=True)
def _cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
min_cluster_size, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min):
# node is a node or the root of the tree in the first call
# parent_node is parent node of N or None if node is root of the tree
# local_maxima_points is list of local maxima points sorted in
# descending order of reachability
if len(local_maxima_points) == 0:
return # parent_node is a leaf
# take largest local maximum as possible separation between clusters
s = local_maxima_points[0]
node.assign_split_point(s)
local_maxima_points = local_maxima_points[1:]
# create two new nodes and add to list of nodes
node_1 = _TreeNode(RPoints[node.start:s], node.start, s, node)
node_2 = _TreeNode(RPoints[s + 1:node.end], s + 1, node.end, node)
local_max_1 = []
local_max_2 = []
for i in local_maxima_points:
if i < s:
local_max_1.append(i)
if i > s:
local_max_2.append(i)
node_list = []
node_list.append((node_1, local_max_1))
node_list.append((node_2, local_max_2))
if RPlot[s] < significant_min:
node.assign_split_point(-1)
# if split_point is not significant, ignore this split and continue
_cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
min_cluster_size, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
return
# only check a certain ratio of points in the child
# nodes formed to the left and right of the maxima
# ...should check_ratio be a user settable parameter?
check_ratio = .8
check_value_1 = int(np.round(check_ratio * len(node_1.points)))
check_value_2 = int(np.round(check_ratio * len(node_2.points)))
if check_value_2 == 0:
check_value_2 = 1
avg_reach_value_1 = float(np.average(RPlot[(node_1.end -
check_value_1):node_1.end]))
avg_reach_value_2 = float(np.average(RPlot[node_2.start:(node_2.start +
check_value_2)]))
if (float(avg_reach_value_1 / float(RPlot[s])) >
maxima_ratio or float(avg_reach_value_2 /
float(RPlot[s])) > maxima_ratio):
if float(avg_reach_value_1 / float(RPlot[s])) < rejection_ratio:
# reject node 2
node_list.remove((node_2, local_max_2))
if float(avg_reach_value_2 / float(RPlot[s])) < rejection_ratio:
# reject node 1
node_list.remove((node_1, local_max_1))
if (float(avg_reach_value_1 / float(RPlot[s])) >=
rejection_ratio and float(avg_reach_value_2 /
float(RPlot[s])) >= rejection_ratio):
node.assign_split_point(-1)
# since split_point is not significant,
# ignore this split and continue (reject both child nodes)
_cluster_tree(node, parent_node, local_maxima_points,
RPlot, RPoints, min_cluster_size,
maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
return
# remove clusters that are too small
if (len(node_1.points) < min_cluster_size and
node_list.count((node_1, local_max_1)) > 0):
# cluster 1 is too small
node_list.remove((node_1, local_max_1))
if (len(node_2.points) < min_cluster_size and
node_list.count((node_2, local_max_1)) > 0):
# cluster 2 is too small
node_list.remove((node_2, local_max_2))
if len(node_list) == 0:
# parent_node will be a leaf
node.assign_split_point(-1)
return
# Check if nodes can be moved up one level - the new cluster created
# is too "similar" to its parent, given the similarity threshold.
bypass_node = 0
if parent_node is not None:
if (float(float(node.end - node.start) /
float(parent_node.end - parent_node.start)) >
similarity_threshold):
parent_node.children.remove(node)
bypass_node = 1
for nl in node_list:
if bypass_node == 1:
parent_node.add_child(nl[0])
_cluster_tree(nl[0], parent_node, nl[1], RPlot, RPoints,
min_cluster_size, maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
else:
node.add_child(nl[0])
_cluster_tree(nl[0], node, nl[1], RPlot, RPoints, min_cluster_size,
maxima_ratio, rejection_ratio,
similarity_threshold, significant_min)
def _get_leaves(node, arr):
if node is not None:
if node.split_point == -1:
arr.append(node)
for n in node.children:
_get_leaves(n, arr)
return arr
def _extract_DBSCAN(setofobjects, epsilon_prime):
# Extract DBSCAN Equivalent cluster structure
# Important: Epsilon prime should be less than epsilon used in OPTICS
cluster_id = 0
for entry in setofobjects.ordering_:
if setofobjects.reachability_[entry] > epsilon_prime:
if setofobjects.core_dists_[entry] <= epsilon_prime:
cluster_id += 1
setofobjects._cluster_id[entry] = cluster_id
else:
# This is only needed for compatibility for repeated scans.
setofobjects._cluster_id[entry] = -1 # Noise points
setofobjects._is_core[entry] = 0
else:
setofobjects._cluster_id[entry] = cluster_id
if setofobjects.core_dists_[entry] <= epsilon_prime:
setofobjects._is_core[entry] = 1 # True
else:
setofobjects._is_core[entry] = 0 # False
"""
===================================
Demo of OPTICS clustering algorithm
===================================
Finds core samples of high density and expands clusters from them.
The key specificity of this example is that the data is generated
so that the clusters have different densities.
"""
# Authors: Shane Grigsby <[email protected]>
# Amy X. Zhang <[email protected]>
# License: BSD 3 clause
from sklearn.cluster.optics_ import OPTICS
import numpy as np
import matplotlib.pyplot as plt
# Generate sample data
np.random.seed(0)
n_points_per_cluster = 250
X = np.empty((0, 2))
X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]
# Plot scatterplot of points
plt.figure(figsize=(7, 7))
plt.subplot(221)
plt.plot(X[:, 0], X[:, 1], 'b.', ms=2)
plt.title("Raw Data")
# Compute OPTICS
clust = OPTICS(eps=30.3, min_samples=9, metric='minkowski')
# Run the fit
clust.fit(X)
# Plot result
core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
core_samples_mask[clust.core_sample_indices_] = True
# Black removed and is used for noise instead.
unique_labels = set(clust.labels_)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
plt.subplot(222)
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (clust.labels_ == k)
xy = X[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14, alpha=0.5)
xy = X[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=2, alpha=0.5)
plt.title("Automatic Clustering \n Estimated number of clusters: %d"
% clust.n_clusters)
# (Re)-extract clustering structure, using a single eps to show comparison
# with DBSCAN. This can be run for any clustering distance, and can be run
# multiple times without rerunning OPTICS. OPTICS does need to be re-run to c
# hange the min-pts parameter.
clust.extract(.15, 'dbscan')
core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
core_samples_mask[clust.core_sample_indices_] = True
# Black removed and is used for noise instead.
unique_labels = set(clust.labels_)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
plt.subplot(223)
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (clust.labels_ == k)
xy = X[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
markeredgecolor='k', markersize=14, alpha=0.5)
xy = X[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
markeredgecolor='k', markersize=2, alpha=0.5)
plt.title('DBSCAN with epsilon of 0.15 \n Estimated number of clusters: %d'
% clust.n_clusters)
# Try with different eps to highlight the problem
clust.extract(.4, 'dbscan')
core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
core_samples_mask[clust.core_sample_indices_] = True
# Black removed and is used for noise instead.
unique_labels = set(clust.labels_)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
plt.subplot(224)
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (clust.labels_ == k)
xy = X[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
markeredgecolor='k', markersize=14, alpha=0.5)
xy = X[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
markeredgecolor='k', markersize=2, alpha=0.5)
plt.title('DBSCAN with epsilon of 0.40 \n Estimated number of clusters: %d'
% clust.n_clusters)
plt.tight_layout()
plt.show()
# Authors: Shane Grigsby <[email protected]>
# Amy X. Zhang <[email protected]>
# License: BSD 3 clause
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster.optics_ import OPTICS
from sklearn.utils.testing import assert_equal, assert_warns
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_raises
from sklearn.cluster.tests.common import generate_clustered_data
def test_optics():
# Tests the optics clustering method and all functions inside it
# 'auto' mode
n_clusters = 3
X = generate_clustered_data(n_clusters=n_clusters)
print(np.shape(X))
# Parameters chosen specifically for this task.
# Compute OPTICS
clust = OPTICS(eps=6.0, min_samples=4, metric='euclidean')
clust.fit(X)
# number of clusters, ignoring noise if present
n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
assert_equal(n_clusters_1, n_clusters)
def test_optics2():
# Tests the optics clustering method and all functions inside it
# 'dbscan' mode
# Compute OPTICS
X = [[1, 1]]
clust = OPTICS(eps=0.3, min_samples=10)
# Run the fit
assert_raises(ValueError, clust.fit, X)
def test_empty_extract():
# Test extract where fit() has not yet been run.
clust = OPTICS(eps=0.3, min_samples=10)
assert_raises(ValueError, clust.extract, 0.01, clustering='auto')
def test_bad_extract():
# Test an extraction of eps too close to original eps
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers,
cluster_std=0.4, random_state=0)
# Compute OPTICS
clust = OPTICS(eps=0.003, min_samples=10)
clust2 = clust.fit(X)
assert_raises(ValueError, clust2.extract, 0.3)
def test_close_extract():
# Test extract where extraction eps is close to scaled epsPrime
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers,
cluster_std=0.4, random_state=0)
# Compute OPTICS
clust = OPTICS(eps=0.2, min_samples=10)
clust3 = clust.fit(X)
# check warning when centers are passed
assert_warns(RuntimeWarning, clust3.extract, 0.3, clustering='dbscan')
assert_equal(max(clust3.labels_), 3)
def test_auto_extract_hier():
# Generate sample data
np.random.seed(0)
n_points_per_cluster = 250
X = np.empty((0, 2))
X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]
# Compute OPTICS
clust = OPTICS(eps=30.3, min_samples=9)
# Run the fit
clust.fit(X)
# Extract the result
# eps not used for 'auto' extract
clust.extract(0.0, 'auto')
assert_equal(len(set(clust.labels_)), 6)
def test_reach_dists():
# Tests against known extraction array
np.random.seed(0)
n_points_per_cluster = 250
X = np.empty((0, 2))
X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
# eps not used for 'auto' extract
X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]
# Compute OPTICS
clust = OPTICS(eps=30.3, min_samples=10, metric='minkowski')
# Run the fit
clust.fit(X)
# Expected values
# Skip to line 370
v = [np.inf, 0.606005, 0.472013, 0.162951, 0.161000, 0.385547, 0.179715,
0.213507, 0.348468, 0.308146, 0.560519, 0.266072, 0.764384, 0.253164,
0.435716, 0.153696, 0.363924, 0.194267, 0.392313, 0.230589, 0.260023,
0.535348, 0.168173, 0.296736, 0.310583, 0.277204, 0.250654, 0.153696,
0.215533, 0.175710, 0.168173, 0.283134, 0.256372, 0.313931, 0.234164,
0.179715, 0.352957, 0.277052, 0.180986, 0.203819, 0.296022, 0.356691,
0.515438, 0.219208, 0.265821, 0.346630, 0.275305, 0.229332, 0.433715,
0.153696, 0.584960, 0.265821, 0.471049, 0.259154, 0.461707, 0.400021,
0.422748, 0.300699, 0.162951, 0.290504, 0.315199, 0.327130, 0.168864,
0.462826, 0.188862, 0.259784, 0.216788, 0.259784, 0.195673, 0.315199,
0.313931, 0.189128, 0.461707, 0.265821, 0.233594, 0.433715, 0.222260,
0.251734, 0.352957, 0.218134, 0.453792, 0.179715, 0.296736, 0.260023,
0.311162, 0.214549, 0.266072, 0.318744, 0.180986, 0.194267, 0.262882,
0.420186, 0.352957, 0.288388, 0.360962, 0.328054, 0.293849, 0.198271,
0.248772, 0.461707, 0.216788, 0.396450, 0.352957, 0.289448, 0.241311,
0.213742, 0.220516, 0.218134, 0.153696, 0.516090, 0.218134, 0.221507,
0.328647, 0.255933, 0.195766, 0.233594, 0.205270, 0.296736, 0.726008,
0.251991, 0.168173, 0.214027, 0.262882, 0.342089, 0.260023, 0.266072,
0.253164, 0.230345, 0.262882, 0.296022, 0.227047, 0.205974, 0.328647,
0.184315, 0.196304, 0.831185, 0.514116, 0.168173, 0.189784, 0.664306,
0.327130, 0.379139, 0.208932, 0.266140, 0.362751, 0.168173, 0.764384,
0.327130, 0.187107, 0.194267, 0.414196, 0.251734, 0.220516, 0.363924,
0.166886, 0.327130, 0.233594, 0.203819, 0.230589, 0.203819, 0.222972,
0.311526, 0.218134, 0.422748, 0.314870, 0.315199, 0.315199, 0.594179,
0.328647, 0.415638, 0.244046, 0.250654, 0.214027, 0.203819, 0.213507,
0.260023, 0.311442, 0.168173, 0.389432, 0.229343, 0.162951, 0.311162,
0.153696, 0.214027, 0.250654, 0.315199, 0.172484, 0.153696, 0.352957,
0.314870, 0.328647, 0.546505, 0.378118, 0.260023, 0.387830, 0.199714,
0.262882, 0.250654, 0.345254, 0.396450, 0.250654, 0.179715, 0.328647,
0.179715, 0.263104, 0.265821, 0.231714, 0.514116, 0.213507, 0.474255,
0.212568, 0.376760, 0.196304, 0.844945, 0.194267, 0.264914, 0.210320,
0.316374, 0.184315, 0.179715, 0.250654, 0.153696, 0.162951, 0.315199,
0.179965, 0.297876, 0.213507, 0.475420, 0.439372, 0.241311, 0.260927,
0.194267, 0.422748, 0.222260, 0.411940, 0.414733, 0.260923, 0.396450,
0.380672, 0.333277, 0.290504, 0.196014, 0.844945, 0.506989, 0.153696,
0.218134, 0.392313, 0.698970, 0.168173, 0.227047, 0.028856, 0.033243,
0.028506, 0.057003, 0.038335, 0.051183, 0.063923, 0.022363, 0.030677,
0.036155, 0.017748, 0.062887, 0.036041, 0.051183, 0.078198, 0.068936,
0.032418, 0.040634, 0.022188, 0.022112, 0.036858, 0.040199, 0.025549,
0.083975, 0.032209, 0.025525, 0.032952, 0.034727, 0.068887, 0.040634,
0.048985, 0.047450, 0.022422, 0.023767, 0.028092, 0.047450, 0.029202,
0.026105, 0.030542, 0.032250, 0.062887, 0.038335, 0.026753, 0.028092,
0.099391, 0.021430, 0.020496, 0.021430, 0.025043, 0.023868, 0.050069,
0.023868, 0.044140, 0.038032, 0.022112, 0.044140, 0.031528, 0.028092,
0.020065, 0.055926, 0.031508, 0.025549, 0.028062, 0.036155, 0.023694,
0.029423, 0.026105, 0.028497, 0.023868, 0.044808, 0.035783, 0.033090,
0.038779, 0.032146, 0.038421, 0.057328, 0.020065, 0.020065, 0.028858,
0.021337, 0.041226, 0.022507, 0.028506, 0.030257, 0.057912, 0.050876,
0.120109, 0.020065, 0.034727, 0.038596, 0.037008, 0.031609, 0.095640,
0.083728, 0.064906, 0.030677, 0.057003, 0.037008, 0.018705, 0.030677,
0.044140, 0.034727, 0.045226, 0.032146, 0.032418, 0.029332, 0.030104,
0.033243, 0.030104, 0.032209, 0.026405, 0.024092, 0.048441, 0.036379,
0.030745, 0.023454, 0.018705, 0.124248, 0.041114, 0.020700, 0.042633,
0.042455, 0.028497, 0.029202, 0.057859, 0.053157, 0.036155, 0.029534,
0.032209, 0.038032, 0.024617, 0.023071, 0.033090, 0.023694, 0.047277,
0.024617, 0.023868, 0.043916, 0.025549, 0.046198, 0.041086, 0.042003,
0.022507, 0.021430, 0.038779, 0.025043, 0.036379, 0.036326, 0.029421,
0.023454, 0.058683, 0.025549, 0.039904, 0.022507, 0.046198, 0.029332,
0.032209, 0.036155, 0.038421, 0.025043, 0.023694, 0.030104, 0.022363,
0.048544, 0.035180, 0.030677, 0.022112, 0.030677, 0.036678, 0.022507,
0.024092, 0.064231, 0.022507, 0.032209, 0.025043, 0.221152, 0.029840,
0.038779, 0.040634, 0.024617, 0.032418, 0.025525, 0.033298, 0.028092,
0.045754, 0.032209, 0.017748, 0.033090, 0.017748, 0.048931, 0.038689,
0.022112, 0.027129, 0.032952, 0.036858, 0.027704, 0.032146, 0.052191,
0.042633, 0.071638, 0.044140, 0.022507, 0.046647, 0.028270, 0.050525,
0.036772, 0.058995, 0.038335, 0.025185, 0.022507, 0.040293, 0.032418,
0.064308, 0.026023, 0.036155, 0.032418, 0.038032, 0.018705, 0.040293,
0.030104, 0.030845, 0.064906, 0.025525, 0.036155, 0.022507, 0.022363,
0.032418, 0.021430, 0.032209, 0.102770, 0.036960, 0.031062, 0.025043,
0.036155, 0.031609, 0.036379, 0.030845, 0.048985, 0.021848, 0.025549,
0.022507, 0.035783, 0.023698, 0.034422, 0.032418, 0.022507, 0.023868,
0.020065, 0.023694, 0.040634, 0.055633, 0.054549, 0.044662, 0.087660,
0.048066, 0.143571, 0.068669, 0.065049, 0.076927, 0.044359, 0.041577,
0.052364, 0.100317, 0.062146, 0.067578, 0.054549, 0.047239, 0.062809,
0.033917, 0.087660, 0.077113, 0.055633, 0.061854, 0.059756, 0.059537,
0.052364, 0.060347, 0.170251, 0.108492, 0.046370, 0.070684, 0.049589,
0.044662, 0.049013, 0.043303, 0.069573, 0.075044, 0.054354, 0.065072,
0.073135, 0.046126, 0.055569, 0.047239, 0.062146, 0.056093, 0.059986,
0.096182, 0.100317, 0.051649, 0.054354, 0.077420, 0.100317, 0.046370,
0.043303, 0.045845, 0.061422, 0.091580, 0.206234, 0.051405, 0.071684,
0.061574, 0.063666, 0.052692, 0.051649, 0.100124, 0.077909, 0.033917,
0.058680, 0.044359, 0.065498, 0.080214, 0.123231, 0.052957, 0.056582,
0.061540, 0.076794, 0.043303, 0.054884, 0.044359, 0.145249, 0.081741,
0.041577, 0.056093, 0.076799, 0.044359, 0.068483, 0.051649, 0.092275,
0.044359, 0.108492, 0.092275, 0.046126, 0.106422, 0.054354, 0.052957,
0.073329, 0.046126, 0.086402, 0.048194, 0.128569, 0.104042, 0.061854,
0.069573, 0.070035, 0.050346, 0.043303, 0.053576, 0.054549, 0.033917,
0.063666, 0.058680, 0.099130, 0.080198, 0.050118, 0.054549, 0.041577,
0.143571, 0.095965, 0.047643, 0.052364, 0.105168, 0.048685, 0.043303,
0.052814, 0.076927, 0.054549, 0.041577, 0.066657, 0.189930, 0.046370,
0.075044, 0.121331, 0.043303, 0.223897, 0.198621, 0.150328, 0.100317,
0.053576, 0.070708, 0.100898, 0.047239, 0.043613, 0.065049, 0.049146,
0.068669, 0.055569, 0.062124, 0.096408, 0.044662, 0.087660, 0.083012,
0.050118, 0.069573, 0.046126, 0.049146, 0.049146, 0.050808, 0.080198,
0.059986, 0.071974, 0.047239, 0.050808, 0.059986, 0.065850, 0.044863,
0.052814, 0.044359, 0.052364, 0.108492, 0.143571, 0.050926, 0.049146,
0.049146, 0.055569, 0.033917, 0.527659, 0.143547, 0.077113, 0.046126,
0.106422, 0.068669, 0.108492, 0.063666, 0.054549, 0.054884, 0.056907,
0.068669, 0.080198, 0.120887, 0.054549, 0.052692, 0.085801, 0.054884,
0.050808, 0.094595, 0.059545, 0.054354, 0.062124, 0.087660, 0.052814,
0.086715, 0.146253, 0.046370, 0.041577, 0.116083, 0.076927, 0.047239,
0.084375, 0.134652, 0.217969, 0.063559, 0.061540, 0.044662, 0.054354,
0.063666, 0.145466, 0.101700, 0.090491, 0.078536, 0.054884, 0.062124,
0.041577, 0.043303, 0.194473, 0.079780, 0.059704, 0.054780, 0.048194,
0.062146, 0.069573, 0.086898, 0.046675, 0.056258, 0.074141, 0.048066,
0.052957, 0.057982, 0.058966, 0.061048, 0.050885, 0.049146, 0.080993,
0.056093, 0.061854, 0.124025, 0.062146, 0.060906, 0.150328, 0.058680,
0.077420, 0.051800, 0.102359, 0.113301, 0.073096, 0.116715, 0.131476,
0.140601, 0.097667, 0.051800, 0.051800, 0.127964, 0.108870, 0.111926,
0.093532, 0.102390, 0.144266, 0.098271, 0.102541, 0.136497, 0.127964,
0.085569, 0.157863, 0.096739, 0.054008, 0.106219, 0.076838, 0.099076,
0.093532, 0.059861, 0.079975, 0.116715, 0.133625, 0.053641, 0.066110,
0.122302, 0.081313, 0.140601, 0.259889, 0.094437, 0.098271, 0.105776,
0.225742, 0.100097, 0.147592, 0.099076, 0.093128, 0.093532, 0.134946,
0.133625, 0.120869, 0.065932, 0.103395, 0.125172, 0.147842, 0.105278,
0.173584, 0.168241, 0.111524, 0.093532, 0.099076, 0.100426, 0.137132,
0.065356, 0.091108, 0.141202, 0.054008, 0.075298, 0.073717, 0.122817,
0.105278, 0.094437, 0.067080, 0.108530, 0.115467, 0.093532, 0.085569,
0.145180, 0.100426, 0.116715, 0.151726, 0.073096, 0.193781, 0.090614,
0.081162, 0.051800, 0.133625, 0.136497, 0.100670, 0.081313, 0.506893,
0.084567, 0.108530, 0.087353, 0.063184, 0.123639, 0.168333, 0.314422,
0.091108, 0.079975, 0.091108, 0.136497, 0.122302, 0.167297, 0.067080,
0.144266, 0.065932, 0.087667, 0.100426, 0.099460, 0.091108, 0.100637,
0.116715, 0.079975, 0.077977, 0.090340, 0.136723, 1.943026, 0.108870,
0.090340, 0.065932, 0.102245, 0.157863, 0.157863, 0.215574, 0.156830,
0.093532, 0.122302, 0.097667, 0.063000, 0.116715, 0.076838, 0.148372,
0.093532, 0.099076, 0.141202, 0.096505, 0.096739, 0.091108, 0.099076,
0.079975, 0.108870, 0.102390, 0.079975, 0.244121, 0.167071, 0.096739,
0.102390, 0.103395, 0.073096, 0.094887, 0.065932, 0.190667, 0.099460,
0.102390, 0.096739, 0.102390, 0.116715, 0.100637, 0.256554, 0.103395,
0.081313, 0.068962, 0.109645, 0.059364, 0.147842, 0.099460, 0.079262,
0.099460, 0.065932, 0.123687, 0.090614, 0.131352, 0.098271, 0.102541,
0.098983, 0.057224, 0.074797, 0.057224, 0.250559, 0.079975, 0.103395,
0.100426, 0.065932, 0.120661, 0.079262, 0.065932, 0.118665, 0.081162,
0.066283, 0.099076, 0.102359, 0.108530, 0.079975, 0.168333, 0.096739,
0.168333, 0.097008, 0.055288, 0.172411, 0.092801, 0.051800, 0.102541,
0.084567, 0.054008, 0.090991, 0.172411, 0.057224, 0.148396, 0.200965,
0.076838, 0.157863, 0.053535, 0.121919, 0.126609, 0.123890, 0.118081,
0.097008, 0.125311, 0.099460, 0.122302, 0.134946, 0.080975, 0.084567,
0.110093, 0.102245, 0.103395, 0.171601, 0.094887, 0.126240, 0.137742,
0.099954, 0.108530, 0.157863, 0.096739, 0.051800, 0.127964, 0.066110,
0.061021, 0.105147, 0.100426, 0.079975, 0.088187, 0.116421, 0.076838,
0.098271, 0.116715, 0.137656, 0.075298, 0.148396, 0.112166, 1.083905,
0.326598, 0.428987, 0.395963, 0.224541, 0.326598, 0.030677, 0.410454,
0.122771, 1.140305, 0.641074, 0.432159, 0.429335, 0.422908, 0.461926,
0.293083, 0.477078, 0.714856, 0.515861, 0.405418, 0.054354, 0.341177,
0.410008, 0.514245, 0.641074, 0.816459, 0.455115, 0.400707, 0.382240,
0.431832, 1.618970, 0.683953, 0.182992, 0.763699, 0.515861, 0.717145,
0.409629, 0.074134, 0.398273, 0.864974, 0.400707, 0.591403, 0.435354,
0.514245, 1.337152, 0.841077, 0.410008, 0.683953, 0.338649, 0.557595,
0.442092, 0.326598, 0.984189, 0.429608, 0.395963, 1.152055, 0.587222,
1.748492, 0.477078, 0.395459, 0.717145, 0.575811, 0.210115, 0.487785,
0.431832, 0.383852, 0.806708, 0.428987, 0.278405, 0.395963, 0.395459,
0.383852, 1.083905, 0.428510, 0.326598, 0.108492, 0.541644, 0.612110,
0.382240, 0.833511, 0.382240, 0.456628, 0.326598, 0.458880, 0.398273,
0.957748, 0.326598, 0.295049, 0.629646, 0.429765, 0.439942, 0.633617,
0.566297, 0.429335, 0.086507, 0.477078, 0.526753, 0.375240, 0.584436,
0.355776, 0.395963, 0.644924, 0.129793, 0.484880, 0.470001, 0.572306,
0.383852, 1.110081, 0.841077, 0.395963, 0.683953, 0.428745, 0.387752,
0.545299, 0.686537, 0.635219, 0.840499, 0.527659, 0.400707, 0.480982,
0.541644, 0.714856, 0.942673, 0.398273, 0.428987, 0.356781, 0.428510,
1.140961, 0.395963, 0.356781, 0.410454, 0.541644, 0.641074, 0.484778,
0.410008, 0.433108, 0.278405, 0.278405, 0.503141, 0.428745, 0.125103,
0.633617, 0.410454, 0.124025, 0.461926, 0.398273, 0.410008, 1.181303,
0.635219, 0.593537, 0.395963, 0.717145, 0.409629, 0.492595, 0.806708,
0.503820, 0.423834, 0.557595, 0.429335, 0.470749, 0.461926, 1.890036,
0.236343, 0.806708, 0.123561, 0.433744, 0.427348, 0.427348, 0.962234,
0.395963, 0.409629, 0.527659, 0.425727, 0.602549, 0.901331, 0.326598,
0.635949, 0.541644, 0.375240, 0.598969, 1.140961, 0.391998, 0.719443,
0.410008, 0.515861, 0.714856, 0.842273, 0.410454, 0.389377, 0.431078,
0.515861, 0.515861, 0.429335, 0.332495, 0.398273, 0.428987, 0.635219,
0.387752, 0.384289, 0.383852, 0.430504, 0.428510, 0.431832, 0.375240,
0.278405, 0.374102, 0.428745, 0.692878, 1.152055, 0.503820, 0.428745,
0.352868, 0.429335, 0.375240, 0.400707, 0.427348, 0.256183, 0.962234,
0.505376, 0.058995, 0.410454, 0.172880, 0.395963, 0.470749, 0.356781,
1.332700, 0.683953, 0.395963, 0.806708, 0.400707, 0.330982, 0.427731,
0.934845, 0.375240, 0.191534, 0.047239, 1.083905, 0.348794, 0.409708,
0.503820, 0.557595, 0.429335, 0.498780, 0.293083, 0.363069, 0.442092,
1.152055, 0.375240, 0.335677, 0.452443, 0.655156, 0.929928, 0.614869,
1.411031, 1.101132, 0.469030, 0.404976, 0.538209, 0.655828, 0.674748,
0.365182, 0.641612, 0.555434, 0.521651, 0.386679, 0.386679, 0.980304,
0.659111, 0.651366, 0.538209, 0.521651, 0.884780, 1.287829, 0.558322,
0.446161, 0.817970, 0.568499, 0.533507, 0.639746, 0.484404, 0.591751,
0.913016, 0.446161, 0.533907, 0.606885, 0.672320, 1.150642, 0.655828,
0.365182, 0.665088, 1.094242, 0.629401, 0.540676, 0.733026, 1.248265,
1.273499, 0.867854, 0.538656, 0.386679, 0.922273, 0.515686, 1.321022,
0.624444, 0.655828, 0.922273, 0.386679, 0.762191, 0.779432, 0.601851,
0.655156, 0.926213, 0.762191, 0.641612, 0.558322, 1.025370, 0.641067,
0.651366, 0.633434, 0.459580, 0.859221, 0.552291, 0.591751, 0.819965,
0.669977, 1.185083, 0.499338, 0.533907, 0.752871, 0.571388, 0.539772,
0.449182, 1.025370, 0.365182, 1.321022, 0.926213, 0.886360, 0.562272,
0.669977, 0.796046, 0.557598, 0.596776, 0.672336, 0.659111, 0.453719,
0.477716, 0.477716, 1.592069, 0.591751, 0.539772, 0.641612, 0.946254,
0.744165, 0.386679, 0.593825, 0.539772, 0.449182, 0.604273, 0.794951,
0.752871, 0.539772, 0.648732, 0.469030, 0.665088, 1.332700, 1.341388,
0.533507, 0.544212, 1.025992, 0.645967, 0.612945, 0.868492, 0.648732,
0.752300, 0.624444, 1.219748, 0.446161, 0.520818, 0.469044, 0.669977,
0.926213, 0.638752, 0.762191, 0.922273, 0.794951, 0.606885, 0.669977,
0.550113, 0.641067, 0.733026, 0.604273, 0.648732, 0.533507, 0.746506,
0.733026, 0.980683, 0.538209, 0.669977, 0.469030, 0.648732, 0.609190,
1.219748, 0.373113, 0.539772, 1.744047, 1.004716, 0.926213, 0.562272,
0.752871, 0.538656, 0.449182, 0.365182, 0.469030, 0.446161, 0.484404,
0.768592, 0.648732, 0.655156, 0.521651, 0.779432, 0.446161, 0.596776,
0.538209, 0.726740, 0.539772, 0.469030, 0.521651, 0.561950, 0.601851,
0.533907, 0.922273, 1.248265, 0.476800, 0.737990, 0.817970, 0.792127,
0.533907, 0.486038, 0.624444, 0.798241, 0.476800, 1.059373, 0.645967,
0.619940, 0.528726, 0.669977, 0.865406, 0.980683, 0.980683, 0.834671,
1.001353, 0.752871, 0.449182, 1.096520, 0.449182, 0.593825, 0.636558,
0.762191, 0.638591, 0.538209, 0.865406, 0.779432, 0.469044, 0.645967,
0.557598, 0.499338, 0.484404, 0.515686, 0.794951, 0.619456, 0.733026,
0.821769, 0.752300, 0.643302, 0.636558, 0.655156, 0.655156, 0.484404,
0.648732, 0.726023, 0.365182, 0.606885, 0.499338, 0.520818, 0.612945,
0.446161, 0.557598, 0.469044, 1.134650, 0.629401, 0.538656, 0.561950,
1.364861, 0.459580, 1.025370, 0.980304, 0.607592, 0.533907, 1.134650,
0.446161, 0.629962]
assert_array_almost_equal(clust.reachability_, np.array(v))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment