agramfort · July 15, 2017 17:20
diff --git a/gistfile1.txt b/gistfile1.txt
 # -*- coding: utf-8 -*-
 """Ordering Points To Identify the Clustering Structure (OPTICS)

 These routines execute the OPTICS algorithm, and implement various
 cluster extraction methods of the ordered list.

 Authors: Shane Grigsby <[email protected]>
         Amy X. Zhang <[email protected]>
 License: BSD 3 clause
 """

 import warnings

 import numpy as np
 from scipy import iterable

 from ..utils import check_array
 from ..utils.validation import check_is_fitted
 from ..neighbors import BallTree
 from ..base import BaseEstimator, ClusterMixin
 from ..metrics.pairwise import pairwise_distances
 from ._optics_inner import quick_scan


 class _SetOfObjects(BallTree):
    """Build balltree data structure with processing index.

    It's built from given data in preparation for OPTICS Algorithm

    Parameters
    ----------
    X: array, shape (n_samples, n_features)
        Input data.

    metric : str
        The metric used.

    **kwargs:
        kwargs passed to BallTree init.
    """

    def __init__(self, X, metric, **kwargs):

        super(_SetOfObjects, self).__init__(X,
                                            metric=metric, **kwargs)

        self._n = len(self.data)
        self.metric = metric
        # Start all points as 'unprocessed' ##
        self._processed = np.zeros((self._n, 1), dtype=bool)
        self.reachability_ = np.ones(self._n) * np.inf
        self.core_dists_ = np.ones(self._n) * np.nan
        # Start all points as noise ##
        self._cluster_id = -np.ones(self._n, dtype=int)
        self._is_core = np.zeros(self._n, dtype=bool)
        self.ordering_ = []


 # OPTICS helper functions; these should not be public #
 def _prep_optics(self, min_samples):
    self.core_dists_[:] = self.query(self.get_arrays()[0],
                                     k=min_samples)[0][:, -1]


 def _build_optics(setofobjects, epsilon):
    # Main OPTICS loop. Not parallelizable. The order that entries are
    # written to the 'ordering_' list is important!
    for point in range(setofobjects._n):
        if not setofobjects._processed[point]:
            _expand_cluster_order(setofobjects, point, epsilon)


 def _expand_cluster_order(setofobjects, point, epsilon):
    # As above, not parallelizable. Parallelizing would allow items in
    # the 'unprocessed' list to switch to 'processed'
    if setofobjects.core_dists_[point] <= epsilon:
        while not setofobjects._processed[point]:
            setofobjects._processed[point] = True
            setofobjects.ordering_.append(point)
            point = _set_reach_dist(setofobjects, point, epsilon)
    else:
        setofobjects.ordering_.append(point)  # For very noisy points
        setofobjects._processed[point] = True


 def _set_reach_dist(setofobjects, point_index, epsilon):
    X = np.array(setofobjects.data[point_index]).reshape(1, -1)
    indices = setofobjects.query_radius(X, r=epsilon,
                                        return_distance=False,
                                        count_only=False,
                                        sort_results=False)[0]

    # Checks to see if there more than one member in the neighborhood
    if iterable(indices):
        # Masking processed values; n_pr is 'not processed'
        n_pr = np.compress((np.take(setofobjects._processed,
                                    indices, axis=0) < 1).ravel(),
                           indices, axis=0)
        # n_pr = indices[(setofobjects._processed[indices] < 1).ravel()]
        if len(n_pr) > 0:
            dists = pairwise_distances(X,
                                       np.take(setofobjects.get_arrays()[0],
                                               n_pr,
                                               axis=0),
                                       setofobjects.metric, n_jobs=1).ravel()

            rdists = np.maximum(dists, setofobjects.core_dists_[point_index])
            new_reach = np.minimum(np.take(setofobjects.reachability_,
                                           n_pr, axis=0),
                                   rdists)
            setofobjects.reachability_[n_pr] = new_reach

        # Checks to see if everything is already processed;
        # if so, return control to main loop
        if n_pr.size > 0:
            # Define return order based on reachability distance
            return(n_pr[quick_scan(np.take(setofobjects.reachability_,
                                           n_pr, axis=0), dists)])
        else:
            return point_index


 class OPTICS(BaseEstimator, ClusterMixin):

    """Estimate clustering structure from vector array

    OPTICS: Ordering Points To Identify the Clustering Structure
    Equivalent to DBSCAN, finds core sample of high density and expands
    clusters from them. Unlike DBSCAN, keeps cluster hierarchy for varying
    epsilon values. Optimized for usage on large point datasets.

    Parameters
    ----------
    eps : float, optional
        The maximum distance between two samples for them to be considered
        as in the same neighborhood. This is also the largest object size
        expected within the dataset. Lower eps values can be used after
        OPTICS is run the first time, with fast returns of labels. Default
        value of "np.inf" will identify clusters across all scales; reducing
        eps will result in shorter run times.

    min_samples : int, optional
        The number of samples in a neighborhood for a point to be considered
        as a core point.

    metric : string or callable, optional
        The distance metric to use for neighborhood lookups. Default is
        "minkowski". Other options include “euclidean”, “manhattan”,
        “chebyshev”, “haversine”, “seuclidean”, “hamming”, “canberra”,
        and “braycurtis”. The “wminkowski” and “mahalanobis” metrics are
        also valid with an additional argument.

    Attributes
    ----------
    `core_sample_indices_` : array, shape (n_core_samples,)
        Indices of core samples.

    `labels_` : array, shape (n_samples,)
        Cluster labels for each point in the dataset given to fit().
        Noisy samples are given the label -1.

    `reachability_` : array, shape (n_samples,)
        Reachability distances per sample.

    `ordering_` : array, shape (n_samples,)
        The cluster ordered list of sample indices

    `core_dists_` : array, shape (n_samples,)
        Distance at which each sample becomes a core point.
        Points which will never be core have a distance of inf.

    References
    ----------
    Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
    "OPTICS: ordering points to identify the clustering structure." ACM SIGMOD
    Record 28, no. 2 (1999): 49-60.
    """

    def __init__(self, eps=np.inf, min_samples=5, metric='euclidean'):
        self.eps = eps
        self.min_samples = min_samples
        self.metric = metric

    def fit(self, X, y=None):
        """Perform OPTICS clustering

        Extracts an ordered list of points and reachability distances, and
        performs initial clustering using 'eps' distance specified at OPTICS
        object instantiation.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            The data.

        Returns
        -------
        self : instance of OPTICS
            The instance.
        """
        X = check_array(X)

        n_samples = len(X)

        # Check for valid n_samples relative to min_samples
        if self.min_samples > n_samples:
            raise ValueError("Number of training samples (%d) must be greater "
                             "than min_samples (%d) used for clustering." %
                             (n_samples, self.min_samples))

        self.tree_ = _SetOfObjects(X, self.metric)
        _prep_optics(self.tree_, self.min_samples)
        _build_optics(self.tree_, self.eps * 5.0)
        self.extract_auto()
        self.core_sample_indices_ = np.arange(self._is_core)
        self.n_clusters_ = max(self._cluster_id)
        return self

    @property
    def reachability_(self):
        return self.tree_.reachability_[:]

    @property
    def core_dists_(self):
        return self.tree_.core_dists_[:]

    @property
    def _cluster_id(self):
        return self.tree_._cluster_id[:]

    @property
    def _is_core(self):
        return self.tree_._is_core[:]

    @property
    def _index(self):
        return self.tree_._index[:]

    @property
    def ordering_(self):
        return self.tree_.ordering_[:]

    @property
    def labels_(self):
        return self._cluster_id[:]

    def extract(self, epsilon_prime, clustering='auto', **kwargs):
        """Performs Automatic extraction for an arbitrary epsilon.

        It can also do DBSCAN equivalent and it can be run multiple times.

        See extract_auto() for full description of parameters

        Parameters
        ----------
        epsilon_prime: float or int, optional
            Used for 'dbscan' clustering. Must be less than or equal to what
            was used for prep and build steps

        clustering: {'auto', 'dbscan'}, optional
            Type of cluster extraction to perform; defaults to 'auto'.

        Returns
        -------
        core_sample_indices_: array, shape (n_core_samples,)
            The indices of the core samples.

        labels_ : array, shape (n_samples,)
            The estimated labels.
        """
        check_is_fitted(self, 'reachability_')

        if epsilon_prime > self.eps * 5.0:
            raise ValueError('Specify an epsilon smaller than %s. Got %s.'
                             % (self.eps * 5, epsilon_prime))

        if clustering == 'dbscan':
            self.eps_prime = epsilon_prime
            _extract_DBSCAN(self, epsilon_prime)
        elif clustering == 'auto':
            self.extract_auto(**kwargs)

        self.core_sample_indices_ = \
            np.arange(len(self.reachability_))[self._is_core == 1]
        self.n_clusters = max(self._cluster_id)

        if epsilon_prime > (self.eps * 1.05):
            warnings.warn(
                "Warning, eps (%s) is close to epsilon_prime (%s): "
                "Output may be unstable." % (self.eps, epsilon_prime),
                RuntimeWarning, stacklevel=2)
            # XXX explain 5% thing

        return self.core_sample_indices_, self.labels_

    def extract_auto(self,
                     maxima_ratio=.75,
                     rejection_ratio=.7,
                     similarity_threshold=0.4,
                     significant_min=.003,
                     min_cluster_size_ratio=.005,
                     min_maxima_ratio=0.001):
        """Performs automatic cluster extraction for variable density data.

        Can be run multiple times with adjusted parameters. Only returns
        core and noise labels.

        Parameters
        ----------
        maxima_ratio: float, optional
            The maximum ratio we allow of average height of clusters on the
            right and left to the local maxima in question. The higher the
            ratio, the more generous the algorithm is to preserving local
            minimums, and the more cuts the resulting tree will have.

        rejection_ratio: float, optional
            Adjusts the fitness of the clustering. When the maxima_ratio is
            exceeded, determine which of the clusters to the left and right to
            reject based on rejection_ratio

        similarity_threshold: float, optional
            Used to check if nodes can be moved up one level, that is, if the
            new cluster created is too "similar" to its parent, given the
            similarity threshold. Similarity can be determined by 1) the size
            of the new cluster relative to the size of the parent node or
            2) the average of the reachability values of the new cluster
            relative to the average of the reachability values of the parent
            node. A lower value for the similarity threshold means less levels
            in the tree.

        significant_min: float, optional
            Sets a lower threshold on how small a significant maxima can be
            min_cluster_size_ratio: float, optional
            Minimum percentage of dataset expected for cluster membership.

        min_maxima_ratio: float, optional
            Used to determine neighborhood size for minimum cluster membership.
        """
        check_is_fitted(self, 'reachability_')

        # Extraction wrapper
        RPlot = self.reachability_[self.ordering_].tolist()
        RPoints = self.ordering_
        root_node = _automatic_cluster(RPlot, RPoints, maxima_ratio,
                                       rejection_ratio, similarity_threshold,
                                       significant_min, min_cluster_size_ratio,
                                       min_maxima_ratio)
        leaves = _get_leaves(root_node, [])
        # Start cluster id's at 1
        clustid = 1
        # Start all points as non-core noise
        self._cluster_id[:] = -1
        self._is_core[:] = 0
        for leaf in leaves:
            index = self.ordering_[leaf.start:leaf.end]
            self._cluster_id[index] = clustid
            self._is_core[index] = 1
            clustid += 1


 def _automatic_cluster(RPlot, RPoints, maxima_ratio, rejection_ratio,
                       similarity_threshold, significant_min,
                       min_cluster_size_ratio, min_maxima_ratio):

    min_neighborhood_size = 2
    min_cluster_size = int(min_cluster_size_ratio * len(RPoints))
    neighborhood_size = int(min_maxima_ratio * len(RPoints))

    # Should this check for < min_samples? Should this be public?
    if min_cluster_size < 5:
        min_cluster_size = 5

    # Again, should this check < min_samples, should the parameter be public?
    if neighborhood_size < min_neighborhood_size:
        neighborhood_size = min_neighborhood_size

    local_maxima_points = _find_local_maxima(RPlot, RPoints, neighborhood_size)
    root_node = _TreeNode(RPoints, 0, len(RPoints), None)
    _cluster_tree(root_node, None, local_maxima_points,
                  RPlot, RPoints, min_cluster_size,
                  maxima_ratio, rejection_ratio,
                  similarity_threshold, significant_min)

    return root_node


 class _TreeNode(object):
    # automatic cluster helper classes and functions
    def __init__(self, points, start, end, parent_node):
        self.points = points
        self.start = start
        self.end = end
        self.parent_node = parent_node
        self.children = []
        self.split_point = -1

    def assign_split_point(self, split_point):
        self.split_point = split_point

    def add_child(self, child):
        self.children.append(child)


 def _is_local_maxima(index, RPlot, RPoints, neighborhood_size):
    # 0 = point at index is not local maxima
    # 1 = point at index is local maxima
    for i in range(1, neighborhood_size + 1):
        # process objects to the right of index
        if index + i < len(RPlot):
            if (RPlot[index] < RPlot[index + i]):
                return 0
        # process objects to the left of index
        if index - i >= 0:
            if (RPlot[index] < RPlot[index - i]):
                return 0
    return 1


 def _find_local_maxima(RPlot, RPoints, neighborhood_size):
    local_maxima_points = {}
    # 1st and last points on Reachability Plot are not taken
    # as local maxima points
    for i in range(1, len(RPoints) - 1):
        # if the point is a local maxima on the reachability plot with
        # regard to neighborhood_size, insert it into priority queue and
        # maxima list
        if (RPlot[i] > RPlot[i - 1] and RPlot[i] >= RPlot[i + 1] and
                _is_local_maxima(i, RPlot, RPoints, neighborhood_size) == 1):
            local_maxima_points[i] = RPlot[i]

    return sorted(local_maxima_points,
                  key=local_maxima_points.__getitem__, reverse=True)


 def _cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
                  min_cluster_size, maxima_ratio, rejection_ratio,
                  similarity_threshold, significant_min):
    # node is a node or the root of the tree in the first call
    # parent_node is parent node of N or None if node is root of the tree
    # local_maxima_points is list of local maxima points sorted in
    # descending order of reachability
    if len(local_maxima_points) == 0:
        return  # parent_node is a leaf

    # take largest local maximum as possible separation between clusters
    s = local_maxima_points[0]
    node.assign_split_point(s)
    local_maxima_points = local_maxima_points[1:]

    # create two new nodes and add to list of nodes
    node_1 = _TreeNode(RPoints[node.start:s], node.start, s, node)
    node_2 = _TreeNode(RPoints[s + 1:node.end], s + 1, node.end, node)
    local_max_1 = []
    local_max_2 = []

    for i in local_maxima_points:
        if i < s:
            local_max_1.append(i)
        if i > s:
            local_max_2.append(i)

    node_list = []
    node_list.append((node_1, local_max_1))
    node_list.append((node_2, local_max_2))

    if RPlot[s] < significant_min:
        node.assign_split_point(-1)
        # if split_point is not significant, ignore this split and continue
        _cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
                      min_cluster_size, maxima_ratio, rejection_ratio,
                      similarity_threshold, significant_min)
        return

    # only check a certain ratio of points in the child
    # nodes formed to the left and right of the maxima
    # ...should check_ratio be a user settable parameter?
    check_ratio = .8
    check_value_1 = int(np.round(check_ratio * len(node_1.points)))
    check_value_2 = int(np.round(check_ratio * len(node_2.points)))
    if check_value_2 == 0:
        check_value_2 = 1
    avg_reach_value_1 = float(np.average(RPlot[(node_1.end -
                                                check_value_1):node_1.end]))
    avg_reach_value_2 = float(np.average(RPlot[node_2.start:(node_2.start +
                                                             check_value_2)]))

    if (float(avg_reach_value_1 / float(RPlot[s])) >
        maxima_ratio or float(avg_reach_value_2 /
                              float(RPlot[s])) > maxima_ratio):

        if float(avg_reach_value_1 / float(RPlot[s])) < rejection_ratio:
            # reject node 2
            node_list.remove((node_2, local_max_2))
        if float(avg_reach_value_2 / float(RPlot[s])) < rejection_ratio:
            # reject node 1
            node_list.remove((node_1, local_max_1))
        if (float(avg_reach_value_1 / float(RPlot[s])) >=
            rejection_ratio and float(avg_reach_value_2 /
                                      float(RPlot[s])) >= rejection_ratio):
            node.assign_split_point(-1)
            # since split_point is not significant,
            # ignore this split and continue (reject both child nodes)
            _cluster_tree(node, parent_node, local_maxima_points,
                          RPlot, RPoints, min_cluster_size,
                          maxima_ratio, rejection_ratio,
                          similarity_threshold, significant_min)
            return

    # remove clusters that are too small
    if (len(node_1.points) < min_cluster_size and
            node_list.count((node_1, local_max_1)) > 0):
        # cluster 1 is too small
        node_list.remove((node_1, local_max_1))
    if (len(node_2.points) < min_cluster_size and
            node_list.count((node_2, local_max_1)) > 0):
        # cluster 2 is too small
        node_list.remove((node_2, local_max_2))
    if len(node_list) == 0:
        # parent_node will be a leaf
        node.assign_split_point(-1)
        return

    # Check if nodes can be moved up one level - the new cluster created
    # is too "similar" to its parent, given the similarity threshold.
    bypass_node = 0
    if parent_node is not None:
        if (float(float(node.end - node.start) /
                  float(parent_node.end - parent_node.start)) >
                similarity_threshold):

            parent_node.children.remove(node)
            bypass_node = 1

    for nl in node_list:
        if bypass_node == 1:
            parent_node.add_child(nl[0])
            _cluster_tree(nl[0], parent_node, nl[1], RPlot, RPoints,
                          min_cluster_size, maxima_ratio, rejection_ratio,
                          similarity_threshold, significant_min)
        else:
            node.add_child(nl[0])
            _cluster_tree(nl[0], node, nl[1], RPlot, RPoints, min_cluster_size,
                          maxima_ratio, rejection_ratio,
                          similarity_threshold, significant_min)


 def _get_leaves(node, arr):
    if node is not None:
        if node.split_point == -1:
            arr.append(node)
        for n in node.children:
            _get_leaves(n, arr)
    return arr


 def _extract_DBSCAN(setofobjects, epsilon_prime):
    # Extract DBSCAN Equivalent cluster structure
    # Important: Epsilon prime should be less than epsilon used in OPTICS
    cluster_id = 0
    for entry in setofobjects.ordering_:
        if setofobjects.reachability_[entry] > epsilon_prime:
            if setofobjects.core_dists_[entry] <= epsilon_prime:
                cluster_id += 1
                setofobjects._cluster_id[entry] = cluster_id
            else:
                # This is only needed for compatibility for repeated scans.
                setofobjects._cluster_id[entry] = -1   # Noise points
                setofobjects._is_core[entry] = 0
        else:
            setofobjects._cluster_id[entry] = cluster_id
            if setofobjects.core_dists_[entry] <= epsilon_prime:
                setofobjects._is_core[entry] = 1   # True
            else:
                setofobjects._is_core[entry] = 0   # False
diff --git a/optics_.py b/optics_.py
 # -*- coding: utf-8 -*-
 """Ordering Points To Identify the Clustering Structure (OPTICS)

 These routines execute the OPTICS algorithm, and implement various
 cluster extraction methods of the ordered list.

 Authors: Shane Grigsby <[email protected]>
         Amy X. Zhang <[email protected]>
 License: BSD 3 clause
 """

 import warnings

 import numpy as np
 from scipy import iterable

 from ..utils import check_array
 from ..utils.validation import check_is_fitted
 from ..neighbors import BallTree
 from ..base import BaseEstimator, ClusterMixin
 from ..metrics.pairwise import pairwise_distances
 from ._optics_inner import quick_scan


 class _SetOfObjects(BallTree):
    """Build balltree data structure with processing index.

    It's built from given data in preparation for OPTICS Algorithm

    Parameters
    ----------
    X: array, shape (n_samples, n_features)
        Input data.

    metric : str
        The metric used.

    **kwargs:
        kwargs passed to BallTree init.
    """

    def __init__(self, X, metric, **kwargs):

        super(_SetOfObjects, self).__init__(X,
                                            metric=metric, **kwargs)

        self._n = len(self.data)
        self.metric = metric
        # Start all points as 'unprocessed' ##
        self._processed = np.zeros((self._n, 1), dtype=bool)
        self.reachability_ = np.ones(self._n) * np.inf
        self.core_dists_ = np.ones(self._n) * np.nan
        # Start all points as noise ##
        self._cluster_id = -np.ones(self._n, dtype=int)
        self._is_core = np.zeros(self._n, dtype=bool)
        self.ordering_ = []


 # OPTICS helper functions; these should not be public #
 def _prep_optics(self, min_samples):
    self.core_dists_[:] = self.query(self.get_arrays()[0],
                                     k=min_samples)[0][:, -1]


 def _build_optics(setofobjects, epsilon):
    # Main OPTICS loop. Not parallelizable. The order that entries are
    # written to the 'ordering_' list is important!
    for point in range(setofobjects._n):
        if not setofobjects._processed[point]:
            _expand_cluster_order(setofobjects, point, epsilon)


 def _expand_cluster_order(setofobjects, point, epsilon):
    # As above, not parallelizable. Parallelizing would allow items in
    # the 'unprocessed' list to switch to 'processed'
    if setofobjects.core_dists_[point] <= epsilon:
        while not setofobjects._processed[point]:
            setofobjects._processed[point] = True
            setofobjects.ordering_.append(point)
            point = _set_reach_dist(setofobjects, point, epsilon)
    else:
        setofobjects.ordering_.append(point)  # For very noisy points
        setofobjects._processed[point] = True


 def _set_reach_dist(setofobjects, point_index, epsilon):
    X = np.array(setofobjects.data[point_index]).reshape(1, -1)
    indices = setofobjects.query_radius(X, r=epsilon,
                                        return_distance=False,
                                        count_only=False,
                                        sort_results=False)[0]

    # Checks to see if there more than one member in the neighborhood
    if iterable(indices):
        # Masking processed values; n_pr is 'not processed'
        n_pr = np.compress((np.take(setofobjects._processed,
                                    indices, axis=0) < 1).ravel(),
                           indices, axis=0)
        # n_pr = indices[(setofobjects._processed[indices] < 1).ravel()]
        if len(n_pr) > 0:
            dists = pairwise_distances(X,
                                       np.take(setofobjects.get_arrays()[0],
                                               n_pr,
                                               axis=0),
                                       setofobjects.metric, n_jobs=1).ravel()

            rdists = np.maximum(dists, setofobjects.core_dists_[point_index])
            new_reach = np.minimum(np.take(setofobjects.reachability_,
                                           n_pr, axis=0),
                                   rdists)
            setofobjects.reachability_[n_pr] = new_reach

        # Checks to see if everything is already processed;
        # if so, return control to main loop
        if n_pr.size > 0:
            # Define return order based on reachability distance
            return(n_pr[quick_scan(np.take(setofobjects.reachability_,
                                           n_pr, axis=0), dists)])
        else:
            return point_index


 class OPTICS(BaseEstimator, ClusterMixin):

    """Estimate clustering structure from vector array

    OPTICS: Ordering Points To Identify the Clustering Structure
    Equivalent to DBSCAN, finds core sample of high density and expands
    clusters from them. Unlike DBSCAN, keeps cluster hierarchy for varying
    epsilon values. Optimized for usage on large point datasets.

    Parameters
    ----------
    eps : float, optional
        The maximum distance between two samples for them to be considered
        as in the same neighborhood. This is also the largest object size
        expected within the dataset. Lower eps values can be used after
        OPTICS is run the first time, with fast returns of labels. Default
        value of "np.inf" will identify clusters across all scales; reducing
        eps will result in shorter run times.

    min_samples : int, optional
        The number of samples in a neighborhood for a point to be considered
        as a core point.

    metric : string or callable, optional
        The distance metric to use for neighborhood lookups. Default is
        "minkowski". Other options include “euclidean”, “manhattan”,
        “chebyshev”, “haversine”, “seuclidean”, “hamming”, “canberra”,
        and “braycurtis”. The “wminkowski” and “mahalanobis” metrics are
        also valid with an additional argument.

    Attributes
    ----------
    `core_sample_indices_` : array, shape (n_core_samples,)
        Indices of core samples.

    `labels_` : array, shape (n_samples,)
        Cluster labels for each point in the dataset given to fit().
        Noisy samples are given the label -1.

    `reachability_` : array, shape (n_samples,)
        Reachability distances per sample.

    `ordering_` : array, shape (n_samples,)
        The cluster ordered list of sample indices

    `core_dists_` : array, shape (n_samples,)
        Distance at which each sample becomes a core point.
        Points which will never be core have a distance of inf.

    References
    ----------
    Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
    "OPTICS: ordering points to identify the clustering structure." ACM SIGMOD
    Record 28, no. 2 (1999): 49-60.
    """

    def __init__(self, eps=np.inf, min_samples=5, metric='euclidean'):
        self.eps = eps
        self.min_samples = min_samples
        self.metric = metric

    def fit(self, X, y=None):
        """Perform OPTICS clustering

        Extracts an ordered list of points and reachability distances, and
        performs initial clustering using 'eps' distance specified at OPTICS
        object instantiation.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            The data.

        Returns
        -------
        self : instance of OPTICS
            The instance.
        """
        X = check_array(X)

        n_samples = len(X)

        # Check for valid n_samples relative to min_samples
        if self.min_samples > n_samples:
            raise ValueError("Number of training samples (%d) must be greater "
                             "than min_samples (%d) used for clustering." %
                             (n_samples, self.min_samples))

        self.tree_ = _SetOfObjects(X, self.metric)
        _prep_optics(self.tree_, self.min_samples)
        _build_optics(self.tree_, self.eps * 5.0)
        self.extract_auto()
        self.core_sample_indices_ = np.arange(self._is_core)
        self.n_clusters_ = max(self._cluster_id)
        return self

    @property
    def reachability_(self):
        return self.tree_.reachability_[:]

    @property
    def core_dists_(self):
        return self.tree_.core_dists_[:]

    @property
    def _cluster_id(self):
        return self.tree_._cluster_id[:]

    @property
    def _is_core(self):
        return self.tree_._is_core[:]

    @property
    def _index(self):
        return self.tree_._index[:]

    @property
    def ordering_(self):
        return self.tree_.ordering_[:]

    @property
    def labels_(self):
        return self._cluster_id[:]

    def extract(self, epsilon_prime, clustering='auto', **kwargs):
        """Performs Automatic extraction for an arbitrary epsilon.

        It can also do DBSCAN equivalent and it can be run multiple times.

        See extract_auto() for full description of parameters

        Parameters
        ----------
        epsilon_prime: float or int, optional
            Used for 'dbscan' clustering. Must be less than or equal to what
            was used for prep and build steps

        clustering: {'auto', 'dbscan'}, optional
            Type of cluster extraction to perform; defaults to 'auto'.

        Returns
        -------
        core_sample_indices_: array, shape (n_core_samples,)
            The indices of the core samples.

        labels_ : array, shape (n_samples,)
            The estimated labels.
        """
        check_is_fitted(self, 'reachability_')

        if epsilon_prime > self.eps * 5.0:
            raise ValueError('Specify an epsilon smaller than %s. Got %s.'
                             % (self.eps * 5, epsilon_prime))

        if clustering == 'dbscan':
            self.eps_prime = epsilon_prime
            _extract_DBSCAN(self, epsilon_prime)
        elif clustering == 'auto':
            self.extract_auto(**kwargs)

        self.core_sample_indices_ = \
            np.arange(len(self.reachability_))[self._is_core == 1]
        self.n_clusters = max(self._cluster_id)

        if epsilon_prime > (self.eps * 1.05):
            warnings.warn(
                "Warning, eps (%s) is close to epsilon_prime (%s): "
                "Output may be unstable." % (self.eps, epsilon_prime),
                RuntimeWarning, stacklevel=2)
            # XXX explain 5% thing

        return self.core_sample_indices_, self.labels_

    def extract_auto(self,
                     maxima_ratio=.75,
                     rejection_ratio=.7,
                     similarity_threshold=0.4,
                     significant_min=.003,
                     min_cluster_size_ratio=.005,
                     min_maxima_ratio=0.001):
        """Performs automatic cluster extraction for variable density data.

        Can be run multiple times with adjusted parameters. Only returns
        core and noise labels.

        Parameters
        ----------
        maxima_ratio: float, optional
            The maximum ratio we allow of average height of clusters on the
            right and left to the local maxima in question. The higher the
            ratio, the more generous the algorithm is to preserving local
            minimums, and the more cuts the resulting tree will have.

        rejection_ratio: float, optional
            Adjusts the fitness of the clustering. When the maxima_ratio is
            exceeded, determine which of the clusters to the left and right to
            reject based on rejection_ratio

        similarity_threshold: float, optional
            Used to check if nodes can be moved up one level, that is, if the
            new cluster created is too "similar" to its parent, given the
            similarity threshold. Similarity can be determined by 1) the size
            of the new cluster relative to the size of the parent node or
            2) the average of the reachability values of the new cluster
            relative to the average of the reachability values of the parent
            node. A lower value for the similarity threshold means less levels
            in the tree.

        significant_min: float, optional
            Sets a lower threshold on how small a significant maxima can be
            min_cluster_size_ratio: float, optional
            Minimum percentage of dataset expected for cluster membership.

        min_maxima_ratio: float, optional
            Used to determine neighborhood size for minimum cluster membership.
        """
        check_is_fitted(self, 'reachability_')

        # Extraction wrapper
        RPlot = self.reachability_[self.ordering_].tolist()
        RPoints = self.ordering_
        root_node = _automatic_cluster(RPlot, RPoints, maxima_ratio,
                                       rejection_ratio, similarity_threshold,
                                       significant_min, min_cluster_size_ratio,
                                       min_maxima_ratio)
        leaves = _get_leaves(root_node, [])
        # Start cluster id's at 1
        clustid = 1
        # Start all points as non-core noise
        self._cluster_id[:] = -1
        self._is_core[:] = 0
        for leaf in leaves:
            index = self.ordering_[leaf.start:leaf.end]
            self._cluster_id[index] = clustid
            self._is_core[index] = 1
            clustid += 1


 def _automatic_cluster(RPlot, RPoints, maxima_ratio, rejection_ratio,
                       similarity_threshold, significant_min,
                       min_cluster_size_ratio, min_maxima_ratio):

    min_neighborhood_size = 2
    min_cluster_size = int(min_cluster_size_ratio * len(RPoints))
    neighborhood_size = int(min_maxima_ratio * len(RPoints))

    # Should this check for < min_samples? Should this be public?
    if min_cluster_size < 5:
        min_cluster_size = 5

    # Again, should this check < min_samples, should the parameter be public?
    if neighborhood_size < min_neighborhood_size:
        neighborhood_size = min_neighborhood_size

    local_maxima_points = _find_local_maxima(RPlot, RPoints, neighborhood_size)
    root_node = _TreeNode(RPoints, 0, len(RPoints), None)
    _cluster_tree(root_node, None, local_maxima_points,
                  RPlot, RPoints, min_cluster_size,
                  maxima_ratio, rejection_ratio,
                  similarity_threshold, significant_min)

    return root_node


 class _TreeNode(object):
    # automatic cluster helper classes and functions
    def __init__(self, points, start, end, parent_node):
        self.points = points
        self.start = start
        self.end = end
        self.parent_node = parent_node
        self.children = []
        self.split_point = -1

    def assign_split_point(self, split_point):
        self.split_point = split_point

    def add_child(self, child):
        self.children.append(child)


 def _is_local_maxima(index, RPlot, RPoints, neighborhood_size):
    # 0 = point at index is not local maxima
    # 1 = point at index is local maxima
    for i in range(1, neighborhood_size + 1):
        # process objects to the right of index
        if index + i < len(RPlot):
            if (RPlot[index] < RPlot[index + i]):
                return 0
        # process objects to the left of index
        if index - i >= 0:
            if (RPlot[index] < RPlot[index - i]):
                return 0
    return 1


 def _find_local_maxima(RPlot, RPoints, neighborhood_size):
    local_maxima_points = {}
    # 1st and last points on Reachability Plot are not taken
    # as local maxima points
    for i in range(1, len(RPoints) - 1):
        # if the point is a local maxima on the reachability plot with
        # regard to neighborhood_size, insert it into priority queue and
        # maxima list
        if (RPlot[i] > RPlot[i - 1] and RPlot[i] >= RPlot[i + 1] and
                _is_local_maxima(i, RPlot, RPoints, neighborhood_size) == 1):
            local_maxima_points[i] = RPlot[i]

    return sorted(local_maxima_points,
                  key=local_maxima_points.__getitem__, reverse=True)


 def _cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
                  min_cluster_size, maxima_ratio, rejection_ratio,
                  similarity_threshold, significant_min):
    # node is a node or the root of the tree in the first call
    # parent_node is parent node of N or None if node is root of the tree
    # local_maxima_points is list of local maxima points sorted in
    # descending order of reachability
    if len(local_maxima_points) == 0:
        return  # parent_node is a leaf

    # take largest local maximum as possible separation between clusters
    s = local_maxima_points[0]
    node.assign_split_point(s)
    local_maxima_points = local_maxima_points[1:]

    # create two new nodes and add to list of nodes
    node_1 = _TreeNode(RPoints[node.start:s], node.start, s, node)
    node_2 = _TreeNode(RPoints[s + 1:node.end], s + 1, node.end, node)
    local_max_1 = []
    local_max_2 = []

    for i in local_maxima_points:
        if i < s:
            local_max_1.append(i)
        if i > s:
            local_max_2.append(i)

    node_list = []
    node_list.append((node_1, local_max_1))
    node_list.append((node_2, local_max_2))

    if RPlot[s] < significant_min:
        node.assign_split_point(-1)
        # if split_point is not significant, ignore this split and continue
        _cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
                      min_cluster_size, maxima_ratio, rejection_ratio,
                      similarity_threshold, significant_min)
        return

    # only check a certain ratio of points in the child
    # nodes formed to the left and right of the maxima
    # ...should check_ratio be a user settable parameter?
    check_ratio = .8
    check_value_1 = int(np.round(check_ratio * len(node_1.points)))
    check_value_2 = int(np.round(check_ratio * len(node_2.points)))
    if check_value_2 == 0:
        check_value_2 = 1
    avg_reach_value_1 = float(np.average(RPlot[(node_1.end -
                                                check_value_1):node_1.end]))
    avg_reach_value_2 = float(np.average(RPlot[node_2.start:(node_2.start +
                                                             check_value_2)]))

    if (float(avg_reach_value_1 / float(RPlot[s])) >
        maxima_ratio or float(avg_reach_value_2 /
                              float(RPlot[s])) > maxima_ratio):

        if float(avg_reach_value_1 / float(RPlot[s])) < rejection_ratio:
            # reject node 2
            node_list.remove((node_2, local_max_2))
        if float(avg_reach_value_2 / float(RPlot[s])) < rejection_ratio:
            # reject node 1
            node_list.remove((node_1, local_max_1))
        if (float(avg_reach_value_1 / float(RPlot[s])) >=
            rejection_ratio and float(avg_reach_value_2 /
                                      float(RPlot[s])) >= rejection_ratio):
            node.assign_split_point(-1)
            # since split_point is not significant,
            # ignore this split and continue (reject both child nodes)
            _cluster_tree(node, parent_node, local_maxima_points,
                          RPlot, RPoints, min_cluster_size,
                          maxima_ratio, rejection_ratio,
                          similarity_threshold, significant_min)
            return

    # remove clusters that are too small
    if (len(node_1.points) < min_cluster_size and
            node_list.count((node_1, local_max_1)) > 0):
        # cluster 1 is too small
        node_list.remove((node_1, local_max_1))
    if (len(node_2.points) < min_cluster_size and
            node_list.count((node_2, local_max_1)) > 0):
        # cluster 2 is too small
        node_list.remove((node_2, local_max_2))
    if len(node_list) == 0:
        # parent_node will be a leaf
        node.assign_split_point(-1)
        return

    # Check if nodes can be moved up one level - the new cluster created
    # is too "similar" to its parent, given the similarity threshold.
    bypass_node = 0
    if parent_node is not None:
        if (float(float(node.end - node.start) /
                  float(parent_node.end - parent_node.start)) >
                similarity_threshold):

            parent_node.children.remove(node)
            bypass_node = 1

    for nl in node_list:
        if bypass_node == 1:
            parent_node.add_child(nl[0])
            _cluster_tree(nl[0], parent_node, nl[1], RPlot, RPoints,
                          min_cluster_size, maxima_ratio, rejection_ratio,
                          similarity_threshold, significant_min)
        else:
            node.add_child(nl[0])
            _cluster_tree(nl[0], node, nl[1], RPlot, RPoints, min_cluster_size,
                          maxima_ratio, rejection_ratio,
                          similarity_threshold, significant_min)


 def _get_leaves(node, arr):
    if node is not None:
        if node.split_point == -1:
            arr.append(node)
        for n in node.children:
            _get_leaves(n, arr)
    return arr


 def _extract_DBSCAN(setofobjects, epsilon_prime):
    # Extract DBSCAN Equivalent cluster structure
    # Important: Epsilon prime should be less than epsilon used in OPTICS
    cluster_id = 0
    for entry in setofobjects.ordering_:
        if setofobjects.reachability_[entry] > epsilon_prime:
            if setofobjects.core_dists_[entry] <= epsilon_prime:
                cluster_id += 1
                setofobjects._cluster_id[entry] = cluster_id
            else:
                # This is only needed for compatibility for repeated scans.
                setofobjects._cluster_id[entry] = -1   # Noise points
                setofobjects._is_core[entry] = 0
        else:
            setofobjects._cluster_id[entry] = cluster_id
            if setofobjects.core_dists_[entry] <= epsilon_prime:
                setofobjects._is_core[entry] = 1   # True
            else:
                setofobjects._is_core[entry] = 0   # False
diff --git a/plot_optics.py b/plot_optics.py
 """
 ===================================
 Demo of OPTICS clustering algorithm
 ===================================

 Finds core samples of high density and expands clusters from them.
 The key specificity of this example is that the data is generated
 so that the clusters have different densities.
 """

 # Authors: Shane Grigsby <[email protected]>
 #          Amy X. Zhang <[email protected]>
 # License: BSD 3 clause


 from sklearn.cluster.optics_ import OPTICS
 import numpy as np

 import matplotlib.pyplot as plt

 # Generate sample data

 np.random.seed(0)
 n_points_per_cluster = 250

 X = np.empty((0, 2))
 X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
 X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
 X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
 X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
 X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
 X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]

 # Plot scatterplot of points

 plt.figure(figsize=(7, 7))

 plt.subplot(221)
 plt.plot(X[:, 0], X[:, 1], 'b.', ms=2)
 plt.title("Raw Data")

 # Compute OPTICS

 clust = OPTICS(eps=30.3, min_samples=9, metric='minkowski')

 # Run the fit
 clust.fit(X)

 # Plot result

 core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
 core_samples_mask[clust.core_sample_indices_] = True

 # Black removed and is used for noise instead.
 unique_labels = set(clust.labels_)
 colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

 plt.subplot(222)

 for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (clust.labels_ == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=14, alpha=0.5)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=2, alpha=0.5)

 plt.title("Automatic Clustering \n Estimated number of clusters: %d"
          % clust.n_clusters)

 # (Re)-extract clustering structure, using a single eps to show comparison
 # with DBSCAN. This can be run for any clustering distance, and can be run
 # multiple times without rerunning OPTICS. OPTICS does need to be re-run to c
 # hange the min-pts parameter.

 clust.extract(.15, 'dbscan')

 core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
 core_samples_mask[clust.core_sample_indices_] = True

 # Black removed and is used for noise instead.
 unique_labels = set(clust.labels_)
 colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

 plt.subplot(223)

 for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (clust.labels_ == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
             markeredgecolor='k', markersize=14, alpha=0.5)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
             markeredgecolor='k', markersize=2, alpha=0.5)

 plt.title('DBSCAN with epsilon of 0.15 \n Estimated number of clusters: %d'
          % clust.n_clusters)

 # Try with different eps to highlight the problem


 clust.extract(.4, 'dbscan')


 core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
 core_samples_mask[clust.core_sample_indices_] = True

 # Black removed and is used for noise instead.
 unique_labels = set(clust.labels_)
 colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

 plt.subplot(224)

 for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (clust.labels_ == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
             markeredgecolor='k', markersize=14, alpha=0.5)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
             markeredgecolor='k', markersize=2, alpha=0.5)

 plt.title('DBSCAN with epsilon of 0.40 \n Estimated number of clusters: %d'
          % clust.n_clusters)

 plt.tight_layout()
 plt.show()
diff --git a/test_optics.py b/test_optics.py
 # Authors: Shane Grigsby <[email protected]>
 #          Amy X. Zhang <[email protected]>
 # License: BSD 3 clause

 import numpy as np

 from sklearn.datasets.samples_generator import make_blobs
 from sklearn.cluster.optics_ import OPTICS
 from sklearn.utils.testing import assert_equal, assert_warns
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises

 from sklearn.cluster.tests.common import generate_clustered_data


 def test_optics():
    # Tests the optics clustering method and all functions inside it
    # 'auto' mode

    n_clusters = 3
    X = generate_clustered_data(n_clusters=n_clusters)
    print(np.shape(X))
    # Parameters chosen specifically for this task.
    # Compute OPTICS
    clust = OPTICS(eps=6.0, min_samples=4, metric='euclidean')
    clust.fit(X)
    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
    assert_equal(n_clusters_1, n_clusters)


 def test_optics2():
    # Tests the optics clustering method and all functions inside it
    # 'dbscan' mode

    # Compute OPTICS
    X = [[1, 1]]
    clust = OPTICS(eps=0.3, min_samples=10)

    # Run the fit
    assert_raises(ValueError, clust.fit, X)


 def test_empty_extract():
    # Test extract where fit() has not yet been run.
    clust = OPTICS(eps=0.3, min_samples=10)
    assert_raises(ValueError, clust.extract, 0.01, clustering='auto')


 def test_bad_extract():
    # Test an extraction of eps too close to original eps
    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(n_samples=750, centers=centers,
                                cluster_std=0.4, random_state=0)

    # Compute OPTICS
    clust = OPTICS(eps=0.003, min_samples=10)
    clust2 = clust.fit(X)
    assert_raises(ValueError, clust2.extract, 0.3)


 def test_close_extract():
    # Test extract where extraction eps is close to scaled epsPrime

    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(n_samples=750, centers=centers,
                                cluster_std=0.4, random_state=0)

    # Compute OPTICS
    clust = OPTICS(eps=0.2, min_samples=10)
    clust3 = clust.fit(X)
    # check warning when centers are passed
    assert_warns(RuntimeWarning, clust3.extract, 0.3, clustering='dbscan')
    assert_equal(max(clust3.labels_), 3)


 def test_auto_extract_hier():
    # Generate sample data

    np.random.seed(0)
    n_points_per_cluster = 250

    X = np.empty((0, 2))
    X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]

    # Compute OPTICS

    clust = OPTICS(eps=30.3, min_samples=9)

    # Run the fit
    clust.fit(X)

    # Extract the result
    # eps not used for 'auto' extract
    clust.extract(0.0, 'auto')

    assert_equal(len(set(clust.labels_)), 6)


 def test_reach_dists():
    # Tests against known extraction array

    np.random.seed(0)
    n_points_per_cluster = 250

    X = np.empty((0, 2))
    X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
    # eps not used for 'auto' extract
    X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]

    # Compute OPTICS

    clust = OPTICS(eps=30.3, min_samples=10, metric='minkowski')

    # Run the fit
    clust.fit(X)

    # Expected values
    # Skip to line 370

    v = [np.inf, 0.606005, 0.472013, 0.162951, 0.161000, 0.385547, 0.179715,
         0.213507, 0.348468, 0.308146, 0.560519, 0.266072, 0.764384, 0.253164,
         0.435716, 0.153696, 0.363924, 0.194267, 0.392313, 0.230589, 0.260023,
         0.535348, 0.168173, 0.296736, 0.310583, 0.277204, 0.250654, 0.153696,
         0.215533, 0.175710, 0.168173, 0.283134, 0.256372, 0.313931, 0.234164,
         0.179715, 0.352957, 0.277052, 0.180986, 0.203819, 0.296022, 0.356691,
         0.515438, 0.219208, 0.265821, 0.346630, 0.275305, 0.229332, 0.433715,
         0.153696, 0.584960, 0.265821, 0.471049, 0.259154, 0.461707, 0.400021,
         0.422748, 0.300699, 0.162951, 0.290504, 0.315199, 0.327130, 0.168864,
         0.462826, 0.188862, 0.259784, 0.216788, 0.259784, 0.195673, 0.315199,
         0.313931, 0.189128, 0.461707, 0.265821, 0.233594, 0.433715, 0.222260,
         0.251734, 0.352957, 0.218134, 0.453792, 0.179715, 0.296736, 0.260023,
         0.311162, 0.214549, 0.266072, 0.318744, 0.180986, 0.194267, 0.262882,
         0.420186, 0.352957, 0.288388, 0.360962, 0.328054, 0.293849, 0.198271,
         0.248772, 0.461707, 0.216788, 0.396450, 0.352957, 0.289448, 0.241311,
         0.213742, 0.220516, 0.218134, 0.153696, 0.516090, 0.218134, 0.221507,
         0.328647, 0.255933, 0.195766, 0.233594, 0.205270, 0.296736, 0.726008,
         0.251991, 0.168173, 0.214027, 0.262882, 0.342089, 0.260023, 0.266072,
         0.253164, 0.230345, 0.262882, 0.296022, 0.227047, 0.205974, 0.328647,
         0.184315, 0.196304, 0.831185, 0.514116, 0.168173, 0.189784, 0.664306,
         0.327130, 0.379139, 0.208932, 0.266140, 0.362751, 0.168173, 0.764384,
         0.327130, 0.187107, 0.194267, 0.414196, 0.251734, 0.220516, 0.363924,
         0.166886, 0.327130, 0.233594, 0.203819, 0.230589, 0.203819, 0.222972,
         0.311526, 0.218134, 0.422748, 0.314870, 0.315199, 0.315199, 0.594179,
         0.328647, 0.415638, 0.244046, 0.250654, 0.214027, 0.203819, 0.213507,
         0.260023, 0.311442, 0.168173, 0.389432, 0.229343, 0.162951, 0.311162,
         0.153696, 0.214027, 0.250654, 0.315199, 0.172484, 0.153696, 0.352957,
         0.314870, 0.328647, 0.546505, 0.378118, 0.260023, 0.387830, 0.199714,
         0.262882, 0.250654, 0.345254, 0.396450, 0.250654, 0.179715, 0.328647,
         0.179715, 0.263104, 0.265821, 0.231714, 0.514116, 0.213507, 0.474255,
         0.212568, 0.376760, 0.196304, 0.844945, 0.194267, 0.264914, 0.210320,
         0.316374, 0.184315, 0.179715, 0.250654, 0.153696, 0.162951, 0.315199,
         0.179965, 0.297876, 0.213507, 0.475420, 0.439372, 0.241311, 0.260927,
         0.194267, 0.422748, 0.222260, 0.411940, 0.414733, 0.260923, 0.396450,
         0.380672, 0.333277, 0.290504, 0.196014, 0.844945, 0.506989, 0.153696,
         0.218134, 0.392313, 0.698970, 0.168173, 0.227047, 0.028856, 0.033243,
         0.028506, 0.057003, 0.038335, 0.051183, 0.063923, 0.022363, 0.030677,
         0.036155, 0.017748, 0.062887, 0.036041, 0.051183, 0.078198, 0.068936,
         0.032418, 0.040634, 0.022188, 0.022112, 0.036858, 0.040199, 0.025549,
         0.083975, 0.032209, 0.025525, 0.032952, 0.034727, 0.068887, 0.040634,
         0.048985, 0.047450, 0.022422, 0.023767, 0.028092, 0.047450, 0.029202,
         0.026105, 0.030542, 0.032250, 0.062887, 0.038335, 0.026753, 0.028092,
         0.099391, 0.021430, 0.020496, 0.021430, 0.025043, 0.023868, 0.050069,
         0.023868, 0.044140, 0.038032, 0.022112, 0.044140, 0.031528, 0.028092,
         0.020065, 0.055926, 0.031508, 0.025549, 0.028062, 0.036155, 0.023694,
         0.029423, 0.026105, 0.028497, 0.023868, 0.044808, 0.035783, 0.033090,
         0.038779, 0.032146, 0.038421, 0.057328, 0.020065, 0.020065, 0.028858,
         0.021337, 0.041226, 0.022507, 0.028506, 0.030257, 0.057912, 0.050876,
         0.120109, 0.020065, 0.034727, 0.038596, 0.037008, 0.031609, 0.095640,
         0.083728, 0.064906, 0.030677, 0.057003, 0.037008, 0.018705, 0.030677,
         0.044140, 0.034727, 0.045226, 0.032146, 0.032418, 0.029332, 0.030104,
         0.033243, 0.030104, 0.032209, 0.026405, 0.024092, 0.048441, 0.036379,
         0.030745, 0.023454, 0.018705, 0.124248, 0.041114, 0.020700, 0.042633,
         0.042455, 0.028497, 0.029202, 0.057859, 0.053157, 0.036155, 0.029534,
         0.032209, 0.038032, 0.024617, 0.023071, 0.033090, 0.023694, 0.047277,
         0.024617, 0.023868, 0.043916, 0.025549, 0.046198, 0.041086, 0.042003,
         0.022507, 0.021430, 0.038779, 0.025043, 0.036379, 0.036326, 0.029421,
         0.023454, 0.058683, 0.025549, 0.039904, 0.022507, 0.046198, 0.029332,
         0.032209, 0.036155, 0.038421, 0.025043, 0.023694, 0.030104, 0.022363,
         0.048544, 0.035180, 0.030677, 0.022112, 0.030677, 0.036678, 0.022507,
         0.024092, 0.064231, 0.022507, 0.032209, 0.025043, 0.221152, 0.029840,
         0.038779, 0.040634, 0.024617, 0.032418, 0.025525, 0.033298, 0.028092,
         0.045754, 0.032209, 0.017748, 0.033090, 0.017748, 0.048931, 0.038689,
         0.022112, 0.027129, 0.032952, 0.036858, 0.027704, 0.032146, 0.052191,
         0.042633, 0.071638, 0.044140, 0.022507, 0.046647, 0.028270, 0.050525,
         0.036772, 0.058995, 0.038335, 0.025185, 0.022507, 0.040293, 0.032418,
         0.064308, 0.026023, 0.036155, 0.032418, 0.038032, 0.018705, 0.040293,
         0.030104, 0.030845, 0.064906, 0.025525, 0.036155, 0.022507, 0.022363,
         0.032418, 0.021430, 0.032209, 0.102770, 0.036960, 0.031062, 0.025043,
         0.036155, 0.031609, 0.036379, 0.030845, 0.048985, 0.021848, 0.025549,
         0.022507, 0.035783, 0.023698, 0.034422, 0.032418, 0.022507, 0.023868,
         0.020065, 0.023694, 0.040634, 0.055633, 0.054549, 0.044662, 0.087660,
         0.048066, 0.143571, 0.068669, 0.065049, 0.076927, 0.044359, 0.041577,
         0.052364, 0.100317, 0.062146, 0.067578, 0.054549, 0.047239, 0.062809,
         0.033917, 0.087660, 0.077113, 0.055633, 0.061854, 0.059756, 0.059537,
         0.052364, 0.060347, 0.170251, 0.108492, 0.046370, 0.070684, 0.049589,
         0.044662, 0.049013, 0.043303, 0.069573, 0.075044, 0.054354, 0.065072,
         0.073135, 0.046126, 0.055569, 0.047239, 0.062146, 0.056093, 0.059986,
         0.096182, 0.100317, 0.051649, 0.054354, 0.077420, 0.100317, 0.046370,
         0.043303, 0.045845, 0.061422, 0.091580, 0.206234, 0.051405, 0.071684,
         0.061574, 0.063666, 0.052692, 0.051649, 0.100124, 0.077909, 0.033917,
         0.058680, 0.044359, 0.065498, 0.080214, 0.123231, 0.052957, 0.056582,
         0.061540, 0.076794, 0.043303, 0.054884, 0.044359, 0.145249, 0.081741,
         0.041577, 0.056093, 0.076799, 0.044359, 0.068483, 0.051649, 0.092275,
         0.044359, 0.108492, 0.092275, 0.046126, 0.106422, 0.054354, 0.052957,
         0.073329, 0.046126, 0.086402, 0.048194, 0.128569, 0.104042, 0.061854,
         0.069573, 0.070035, 0.050346, 0.043303, 0.053576, 0.054549, 0.033917,
         0.063666, 0.058680, 0.099130, 0.080198, 0.050118, 0.054549, 0.041577,
         0.143571, 0.095965, 0.047643, 0.052364, 0.105168, 0.048685, 0.043303,
         0.052814, 0.076927, 0.054549, 0.041577, 0.066657, 0.189930, 0.046370,
         0.075044, 0.121331, 0.043303, 0.223897, 0.198621, 0.150328, 0.100317,
         0.053576, 0.070708, 0.100898, 0.047239, 0.043613, 0.065049, 0.049146,
         0.068669, 0.055569, 0.062124, 0.096408, 0.044662, 0.087660, 0.083012,
         0.050118, 0.069573, 0.046126, 0.049146, 0.049146, 0.050808, 0.080198,
         0.059986, 0.071974, 0.047239, 0.050808, 0.059986, 0.065850, 0.044863,
         0.052814, 0.044359, 0.052364, 0.108492, 0.143571, 0.050926, 0.049146,
         0.049146, 0.055569, 0.033917, 0.527659, 0.143547, 0.077113, 0.046126,
         0.106422, 0.068669, 0.108492, 0.063666, 0.054549, 0.054884, 0.056907,
         0.068669, 0.080198, 0.120887, 0.054549, 0.052692, 0.085801, 0.054884,
         0.050808, 0.094595, 0.059545, 0.054354, 0.062124, 0.087660, 0.052814,
         0.086715, 0.146253, 0.046370, 0.041577, 0.116083, 0.076927, 0.047239,
         0.084375, 0.134652, 0.217969, 0.063559, 0.061540, 0.044662, 0.054354,
         0.063666, 0.145466, 0.101700, 0.090491, 0.078536, 0.054884, 0.062124,
         0.041577, 0.043303, 0.194473, 0.079780, 0.059704, 0.054780, 0.048194,
         0.062146, 0.069573, 0.086898, 0.046675, 0.056258, 0.074141, 0.048066,
         0.052957, 0.057982, 0.058966, 0.061048, 0.050885, 0.049146, 0.080993,
         0.056093, 0.061854, 0.124025, 0.062146, 0.060906, 0.150328, 0.058680,
         0.077420, 0.051800, 0.102359, 0.113301, 0.073096, 0.116715, 0.131476,
         0.140601, 0.097667, 0.051800, 0.051800, 0.127964, 0.108870, 0.111926,
         0.093532, 0.102390, 0.144266, 0.098271, 0.102541, 0.136497, 0.127964,
         0.085569, 0.157863, 0.096739, 0.054008, 0.106219, 0.076838, 0.099076,
         0.093532, 0.059861, 0.079975, 0.116715, 0.133625, 0.053641, 0.066110,
         0.122302, 0.081313, 0.140601, 0.259889, 0.094437, 0.098271, 0.105776,
         0.225742, 0.100097, 0.147592, 0.099076, 0.093128, 0.093532, 0.134946,
         0.133625, 0.120869, 0.065932, 0.103395, 0.125172, 0.147842, 0.105278,
         0.173584, 0.168241, 0.111524, 0.093532, 0.099076, 0.100426, 0.137132,
         0.065356, 0.091108, 0.141202, 0.054008, 0.075298, 0.073717, 0.122817,
         0.105278, 0.094437, 0.067080, 0.108530, 0.115467, 0.093532, 0.085569,
         0.145180, 0.100426, 0.116715, 0.151726, 0.073096, 0.193781, 0.090614,
         0.081162, 0.051800, 0.133625, 0.136497, 0.100670, 0.081313, 0.506893,
         0.084567, 0.108530, 0.087353, 0.063184, 0.123639, 0.168333, 0.314422,
         0.091108, 0.079975, 0.091108, 0.136497, 0.122302, 0.167297, 0.067080,
         0.144266, 0.065932, 0.087667, 0.100426, 0.099460, 0.091108, 0.100637,
         0.116715, 0.079975, 0.077977, 0.090340, 0.136723, 1.943026, 0.108870,
         0.090340, 0.065932, 0.102245, 0.157863, 0.157863, 0.215574, 0.156830,
         0.093532, 0.122302, 0.097667, 0.063000, 0.116715, 0.076838, 0.148372,
         0.093532, 0.099076, 0.141202, 0.096505, 0.096739, 0.091108, 0.099076,
         0.079975, 0.108870, 0.102390, 0.079975, 0.244121, 0.167071, 0.096739,
         0.102390, 0.103395, 0.073096, 0.094887, 0.065932, 0.190667, 0.099460,
         0.102390, 0.096739, 0.102390, 0.116715, 0.100637, 0.256554, 0.103395,
         0.081313, 0.068962, 0.109645, 0.059364, 0.147842, 0.099460, 0.079262,
         0.099460, 0.065932, 0.123687, 0.090614, 0.131352, 0.098271, 0.102541,
         0.098983, 0.057224, 0.074797, 0.057224, 0.250559, 0.079975, 0.103395,
         0.100426, 0.065932, 0.120661, 0.079262, 0.065932, 0.118665, 0.081162,
         0.066283, 0.099076, 0.102359, 0.108530, 0.079975, 0.168333, 0.096739,
         0.168333, 0.097008, 0.055288, 0.172411, 0.092801, 0.051800, 0.102541,
         0.084567, 0.054008, 0.090991, 0.172411, 0.057224, 0.148396, 0.200965,
         0.076838, 0.157863, 0.053535, 0.121919, 0.126609, 0.123890, 0.118081,
         0.097008, 0.125311, 0.099460, 0.122302, 0.134946, 0.080975, 0.084567,
         0.110093, 0.102245, 0.103395, 0.171601, 0.094887, 0.126240, 0.137742,
         0.099954, 0.108530, 0.157863, 0.096739, 0.051800, 0.127964, 0.066110,
         0.061021, 0.105147, 0.100426, 0.079975, 0.088187, 0.116421, 0.076838,
         0.098271, 0.116715, 0.137656, 0.075298, 0.148396, 0.112166, 1.083905,
         0.326598, 0.428987, 0.395963, 0.224541, 0.326598, 0.030677, 0.410454,
         0.122771, 1.140305, 0.641074, 0.432159, 0.429335, 0.422908, 0.461926,
         0.293083, 0.477078, 0.714856, 0.515861, 0.405418, 0.054354, 0.341177,
         0.410008, 0.514245, 0.641074, 0.816459, 0.455115, 0.400707, 0.382240,
         0.431832, 1.618970, 0.683953, 0.182992, 0.763699, 0.515861, 0.717145,
         0.409629, 0.074134, 0.398273, 0.864974, 0.400707, 0.591403, 0.435354,
         0.514245, 1.337152, 0.841077, 0.410008, 0.683953, 0.338649, 0.557595,
         0.442092, 0.326598, 0.984189, 0.429608, 0.395963, 1.152055, 0.587222,
         1.748492, 0.477078, 0.395459, 0.717145, 0.575811, 0.210115, 0.487785,
         0.431832, 0.383852, 0.806708, 0.428987, 0.278405, 0.395963, 0.395459,
         0.383852, 1.083905, 0.428510, 0.326598, 0.108492, 0.541644, 0.612110,
         0.382240, 0.833511, 0.382240, 0.456628, 0.326598, 0.458880, 0.398273,
         0.957748, 0.326598, 0.295049, 0.629646, 0.429765, 0.439942, 0.633617,
         0.566297, 0.429335, 0.086507, 0.477078, 0.526753, 0.375240, 0.584436,
         0.355776, 0.395963, 0.644924, 0.129793, 0.484880, 0.470001, 0.572306,
         0.383852, 1.110081, 0.841077, 0.395963, 0.683953, 0.428745, 0.387752,
         0.545299, 0.686537, 0.635219, 0.840499, 0.527659, 0.400707, 0.480982,
         0.541644, 0.714856, 0.942673, 0.398273, 0.428987, 0.356781, 0.428510,
         1.140961, 0.395963, 0.356781, 0.410454, 0.541644, 0.641074, 0.484778,
         0.410008, 0.433108, 0.278405, 0.278405, 0.503141, 0.428745, 0.125103,
         0.633617, 0.410454, 0.124025, 0.461926, 0.398273, 0.410008, 1.181303,
         0.635219, 0.593537, 0.395963, 0.717145, 0.409629, 0.492595, 0.806708,
         0.503820, 0.423834, 0.557595, 0.429335, 0.470749, 0.461926, 1.890036,
         0.236343, 0.806708, 0.123561, 0.433744, 0.427348, 0.427348, 0.962234,
         0.395963, 0.409629, 0.527659, 0.425727, 0.602549, 0.901331, 0.326598,
         0.635949, 0.541644, 0.375240, 0.598969, 1.140961, 0.391998, 0.719443,
         0.410008, 0.515861, 0.714856, 0.842273, 0.410454, 0.389377, 0.431078,
         0.515861, 0.515861, 0.429335, 0.332495, 0.398273, 0.428987, 0.635219,
         0.387752, 0.384289, 0.383852, 0.430504, 0.428510, 0.431832, 0.375240,
         0.278405, 0.374102, 0.428745, 0.692878, 1.152055, 0.503820, 0.428745,
         0.352868, 0.429335, 0.375240, 0.400707, 0.427348, 0.256183, 0.962234,
         0.505376, 0.058995, 0.410454, 0.172880, 0.395963, 0.470749, 0.356781,
         1.332700, 0.683953, 0.395963, 0.806708, 0.400707, 0.330982, 0.427731,
         0.934845, 0.375240, 0.191534, 0.047239, 1.083905, 0.348794, 0.409708,
         0.503820, 0.557595, 0.429335, 0.498780, 0.293083, 0.363069, 0.442092,
         1.152055, 0.375240, 0.335677, 0.452443, 0.655156, 0.929928, 0.614869,
         1.411031, 1.101132, 0.469030, 0.404976, 0.538209, 0.655828, 0.674748,
         0.365182, 0.641612, 0.555434, 0.521651, 0.386679, 0.386679, 0.980304,
         0.659111, 0.651366, 0.538209, 0.521651, 0.884780, 1.287829, 0.558322,
         0.446161, 0.817970, 0.568499, 0.533507, 0.639746, 0.484404, 0.591751,
         0.913016, 0.446161, 0.533907, 0.606885, 0.672320, 1.150642, 0.655828,
         0.365182, 0.665088, 1.094242, 0.629401, 0.540676, 0.733026, 1.248265,
         1.273499, 0.867854, 0.538656, 0.386679, 0.922273, 0.515686, 1.321022,
         0.624444, 0.655828, 0.922273, 0.386679, 0.762191, 0.779432, 0.601851,
         0.655156, 0.926213, 0.762191, 0.641612, 0.558322, 1.025370, 0.641067,
         0.651366, 0.633434, 0.459580, 0.859221, 0.552291, 0.591751, 0.819965,
         0.669977, 1.185083, 0.499338, 0.533907, 0.752871, 0.571388, 0.539772,
         0.449182, 1.025370, 0.365182, 1.321022, 0.926213, 0.886360, 0.562272,
         0.669977, 0.796046, 0.557598, 0.596776, 0.672336, 0.659111, 0.453719,
         0.477716, 0.477716, 1.592069, 0.591751, 0.539772, 0.641612, 0.946254,
         0.744165, 0.386679, 0.593825, 0.539772, 0.449182, 0.604273, 0.794951,
         0.752871, 0.539772, 0.648732, 0.469030, 0.665088, 1.332700, 1.341388,
         0.533507, 0.544212, 1.025992, 0.645967, 0.612945, 0.868492, 0.648732,
         0.752300, 0.624444, 1.219748, 0.446161, 0.520818, 0.469044, 0.669977,
         0.926213, 0.638752, 0.762191, 0.922273, 0.794951, 0.606885, 0.669977,
         0.550113, 0.641067, 0.733026, 0.604273, 0.648732, 0.533507, 0.746506,
         0.733026, 0.980683, 0.538209, 0.669977, 0.469030, 0.648732, 0.609190,
         1.219748, 0.373113, 0.539772, 1.744047, 1.004716, 0.926213, 0.562272,
         0.752871, 0.538656, 0.449182, 0.365182, 0.469030, 0.446161, 0.484404,
         0.768592, 0.648732, 0.655156, 0.521651, 0.779432, 0.446161, 0.596776,
         0.538209, 0.726740, 0.539772, 0.469030, 0.521651, 0.561950, 0.601851,
         0.533907, 0.922273, 1.248265, 0.476800, 0.737990, 0.817970, 0.792127,
         0.533907, 0.486038, 0.624444, 0.798241, 0.476800, 1.059373, 0.645967,
         0.619940, 0.528726, 0.669977, 0.865406, 0.980683, 0.980683, 0.834671,
         1.001353, 0.752871, 0.449182, 1.096520, 0.449182, 0.593825, 0.636558,
         0.762191, 0.638591, 0.538209, 0.865406, 0.779432, 0.469044, 0.645967,
         0.557598, 0.499338, 0.484404, 0.515686, 0.794951, 0.619456, 0.733026,
         0.821769, 0.752300, 0.643302, 0.636558, 0.655156, 0.655156, 0.484404,
         0.648732, 0.726023, 0.365182, 0.606885, 0.499338, 0.520818, 0.612945,
         0.446161, 0.557598, 0.469044, 1.134650, 0.629401, 0.538656, 0.561950,
         1.364861, 0.459580, 1.025370, 0.980304, 0.607592, 0.533907, 1.134650,
         0.446161, 0.629962]

    assert_array_almost_equal(clust.reachability_, np.array(v))
	# -- coding: utf-8 --
	"""Ordering Points To Identify the Clustering Structure (OPTICS)

	These routines execute the OPTICS algorithm, and implement various
	cluster extraction methods of the ordered list.

	Authors: Shane Grigsby <[email protected]>
	Amy X. Zhang <[email protected]>
	License: BSD 3 clause
	"""

	import warnings

	import numpy as np
	from scipy import iterable

	from ..utils import check_array
	from ..utils.validation import check_is_fitted
	from ..neighbors import BallTree
	from ..base import BaseEstimator, ClusterMixin
	from ..metrics.pairwise import pairwise_distances
	from ._optics_inner import quick_scan


	class _SetOfObjects(BallTree):
	"""Build balltree data structure with processing index.

	It's built from given data in preparation for OPTICS Algorithm

	Parameters
	----------
	X: array, shape (n_samples, n_features)
	Input data.

	metric : str
	The metric used.

	**kwargs:
	kwargs passed to BallTree init.
	"""

	def __init__(self, X, metric, **kwargs):

	super(_SetOfObjects, self).__init__(X,
	metric=metric, **kwargs)

	self._n = len(self.data)
	self.metric = metric
	# Start all points as 'unprocessed' ##
	self._processed = np.zeros((self._n, 1), dtype=bool)
	self.reachability_ = np.ones(self._n) * np.inf
	self.core_dists_ = np.ones(self._n) * np.nan
	# Start all points as noise ##
	self._cluster_id = -np.ones(self._n, dtype=int)
	self._is_core = np.zeros(self._n, dtype=bool)
	self.ordering_ = []


	# OPTICS helper functions; these should not be public #
	def _prep_optics(self, min_samples):
	self.core_dists_[:] = self.query(self.get_arrays()[0],
	k=min_samples)[0][:, -1]


	def _build_optics(setofobjects, epsilon):
	# Main OPTICS loop. Not parallelizable. The order that entries are
	# written to the 'ordering_' list is important!
	for point in range(setofobjects._n):
	if not setofobjects._processed[point]:
	_expand_cluster_order(setofobjects, point, epsilon)


	def _expand_cluster_order(setofobjects, point, epsilon):
	# As above, not parallelizable. Parallelizing would allow items in
	# the 'unprocessed' list to switch to 'processed'
	if setofobjects.core_dists_[point] <= epsilon:
	while not setofobjects._processed[point]:
	setofobjects._processed[point] = True
	setofobjects.ordering_.append(point)
	point = _set_reach_dist(setofobjects, point, epsilon)
	else:
	setofobjects.ordering_.append(point) # For very noisy points
	setofobjects._processed[point] = True


	def _set_reach_dist(setofobjects, point_index, epsilon):
	X = np.array(setofobjects.data[point_index]).reshape(1, -1)
	indices = setofobjects.query_radius(X, r=epsilon,
	return_distance=False,
	count_only=False,
	sort_results=False)[0]

	# Checks to see if there more than one member in the neighborhood
	if iterable(indices):
	# Masking processed values; n_pr is 'not processed'
	n_pr = np.compress((np.take(setofobjects._processed,
	indices, axis=0) < 1).ravel(),
	indices, axis=0)
	# n_pr = indices[(setofobjects._processed[indices] < 1).ravel()]
	if len(n_pr) > 0:
	dists = pairwise_distances(X,
	np.take(setofobjects.get_arrays()[0],
	n_pr,
	axis=0),
	setofobjects.metric, n_jobs=1).ravel()

	rdists = np.maximum(dists, setofobjects.core_dists_[point_index])
	new_reach = np.minimum(np.take(setofobjects.reachability_,
	n_pr, axis=0),
	rdists)
	setofobjects.reachability_[n_pr] = new_reach

	# Checks to see if everything is already processed;
	# if so, return control to main loop
	if n_pr.size > 0:
	# Define return order based on reachability distance
	return(n_pr[quick_scan(np.take(setofobjects.reachability_,
	n_pr, axis=0), dists)])
	else:
	return point_index


	class OPTICS(BaseEstimator, ClusterMixin):

	"""Estimate clustering structure from vector array

	OPTICS: Ordering Points To Identify the Clustering Structure
	Equivalent to DBSCAN, finds core sample of high density and expands
	clusters from them. Unlike DBSCAN, keeps cluster hierarchy for varying
	epsilon values. Optimized for usage on large point datasets.

	Parameters
	----------
	eps : float, optional
	The maximum distance between two samples for them to be considered
	as in the same neighborhood. This is also the largest object size
	expected within the dataset. Lower eps values can be used after
	OPTICS is run the first time, with fast returns of labels. Default
	value of "np.inf" will identify clusters across all scales; reducing
	eps will result in shorter run times.

	min_samples : int, optional
	The number of samples in a neighborhood for a point to be considered
	as a core point.

	metric : string or callable, optional
	The distance metric to use for neighborhood lookups. Default is
	"minkowski". Other options include “euclidean”, “manhattan”,
	“chebyshev”, “haversine”, “seuclidean”, “hamming”, “canberra”,
	and “braycurtis”. The “wminkowski” and “mahalanobis” metrics are
	also valid with an additional argument.

	Attributes
	----------
	`core_sample_indices_` : array, shape (n_core_samples,)
	Indices of core samples.

	`labels_` : array, shape (n_samples,)
	Cluster labels for each point in the dataset given to fit().
	Noisy samples are given the label -1.

	`reachability_` : array, shape (n_samples,)
	Reachability distances per sample.

	`ordering_` : array, shape (n_samples,)
	The cluster ordered list of sample indices

	`core_dists_` : array, shape (n_samples,)
	Distance at which each sample becomes a core point.
	Points which will never be core have a distance of inf.

	References
	----------
	Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
	"OPTICS: ordering points to identify the clustering structure." ACM SIGMOD
	Record 28, no. 2 (1999): 49-60.
	"""

	def __init__(self, eps=np.inf, min_samples=5, metric='euclidean'):
	self.eps = eps
	self.min_samples = min_samples
	self.metric = metric

	def fit(self, X, y=None):
	"""Perform OPTICS clustering

	Extracts an ordered list of points and reachability distances, and
	performs initial clustering using 'eps' distance specified at OPTICS
	object instantiation.

	Parameters
	----------
	X : array, shape (n_samples, n_features)
	The data.

	Returns
	-------
	self : instance of OPTICS
	The instance.
	"""
	X = check_array(X)

	n_samples = len(X)

	# Check for valid n_samples relative to min_samples
	if self.min_samples > n_samples:
	raise ValueError("Number of training samples (%d) must be greater "
	"than min_samples (%d) used for clustering." %
	(n_samples, self.min_samples))

	self.tree_ = _SetOfObjects(X, self.metric)
	_prep_optics(self.tree_, self.min_samples)
	_build_optics(self.tree_, self.eps * 5.0)
	self.extract_auto()
	self.core_sample_indices_ = np.arange(self._is_core)
	self.n_clusters_ = max(self._cluster_id)
	return self

	@property
	def reachability_(self):
	return self.tree_.reachability_[:]

	@property
	def core_dists_(self):
	return self.tree_.core_dists_[:]

	@property
	def _cluster_id(self):
	return self.tree_._cluster_id[:]

	@property
	def _is_core(self):
	return self.tree_._is_core[:]

	@property
	def _index(self):
	return self.tree_._index[:]

	@property
	def ordering_(self):
	return self.tree_.ordering_[:]

	@property
	def labels_(self):
	return self._cluster_id[:]

	def extract(self, epsilon_prime, clustering='auto', **kwargs):
	"""Performs Automatic extraction for an arbitrary epsilon.

	It can also do DBSCAN equivalent and it can be run multiple times.

	See extract_auto() for full description of parameters

	Parameters
	----------
	epsilon_prime: float or int, optional
	Used for 'dbscan' clustering. Must be less than or equal to what
	was used for prep and build steps

	clustering: {'auto', 'dbscan'}, optional
	Type of cluster extraction to perform; defaults to 'auto'.

	Returns
	-------
	core_sample_indices_: array, shape (n_core_samples,)
	The indices of the core samples.

	labels_ : array, shape (n_samples,)
	The estimated labels.
	"""
	check_is_fitted(self, 'reachability_')

	if epsilon_prime > self.eps * 5.0:
	raise ValueError('Specify an epsilon smaller than %s. Got %s.'
	% (self.eps * 5, epsilon_prime))

	if clustering == 'dbscan':
	self.eps_prime = epsilon_prime
	_extract_DBSCAN(self, epsilon_prime)
	elif clustering == 'auto':
	self.extract_auto(**kwargs)

	self.core_sample_indices_ = \
	np.arange(len(self.reachability_))[self._is_core == 1]
	self.n_clusters = max(self._cluster_id)

	if epsilon_prime > (self.eps * 1.05):
	warnings.warn(
	"Warning, eps (%s) is close to epsilon_prime (%s): "
	"Output may be unstable." % (self.eps, epsilon_prime),
	RuntimeWarning, stacklevel=2)
	# XXX explain 5% thing

	return self.core_sample_indices_, self.labels_

	def extract_auto(self,
	maxima_ratio=.75,
	rejection_ratio=.7,
	similarity_threshold=0.4,
	significant_min=.003,
	min_cluster_size_ratio=.005,
	min_maxima_ratio=0.001):
	"""Performs automatic cluster extraction for variable density data.

	Can be run multiple times with adjusted parameters. Only returns
	core and noise labels.

	Parameters
	----------
	maxima_ratio: float, optional
	The maximum ratio we allow of average height of clusters on the
	right and left to the local maxima in question. The higher the
	ratio, the more generous the algorithm is to preserving local
	minimums, and the more cuts the resulting tree will have.

	rejection_ratio: float, optional
	Adjusts the fitness of the clustering. When the maxima_ratio is
	exceeded, determine which of the clusters to the left and right to
	reject based on rejection_ratio

	similarity_threshold: float, optional
	Used to check if nodes can be moved up one level, that is, if the
	new cluster created is too "similar" to its parent, given the
	similarity threshold. Similarity can be determined by 1) the size
	of the new cluster relative to the size of the parent node or
	2) the average of the reachability values of the new cluster
	relative to the average of the reachability values of the parent
	node. A lower value for the similarity threshold means less levels
	in the tree.

	significant_min: float, optional
	Sets a lower threshold on how small a significant maxima can be
	min_cluster_size_ratio: float, optional
	Minimum percentage of dataset expected for cluster membership.

	min_maxima_ratio: float, optional
	Used to determine neighborhood size for minimum cluster membership.
	"""
	check_is_fitted(self, 'reachability_')

	# Extraction wrapper
	RPlot = self.reachability_[self.ordering_].tolist()
	RPoints = self.ordering_
	root_node = _automatic_cluster(RPlot, RPoints, maxima_ratio,
	rejection_ratio, similarity_threshold,
	significant_min, min_cluster_size_ratio,
	min_maxima_ratio)
	leaves = _get_leaves(root_node, [])
	# Start cluster id's at 1
	clustid = 1
	# Start all points as non-core noise
	self._cluster_id[:] = -1
	self._is_core[:] = 0
	for leaf in leaves:
	index = self.ordering_[leaf.start:leaf.end]
	self._cluster_id[index] = clustid
	self._is_core[index] = 1
	clustid += 1


	def _automatic_cluster(RPlot, RPoints, maxima_ratio, rejection_ratio,
	similarity_threshold, significant_min,
	min_cluster_size_ratio, min_maxima_ratio):

	min_neighborhood_size = 2
	min_cluster_size = int(min_cluster_size_ratio * len(RPoints))
	neighborhood_size = int(min_maxima_ratio * len(RPoints))

	# Should this check for < min_samples? Should this be public?
	if min_cluster_size < 5:
	min_cluster_size = 5

	# Again, should this check < min_samples, should the parameter be public?
	if neighborhood_size < min_neighborhood_size:
	neighborhood_size = min_neighborhood_size

	local_maxima_points = _find_local_maxima(RPlot, RPoints, neighborhood_size)
	root_node = _TreeNode(RPoints, 0, len(RPoints), None)
	_cluster_tree(root_node, None, local_maxima_points,
	RPlot, RPoints, min_cluster_size,
	maxima_ratio, rejection_ratio,
	similarity_threshold, significant_min)

	return root_node


	class _TreeNode(object):
	# automatic cluster helper classes and functions
	def __init__(self, points, start, end, parent_node):
	self.points = points
	self.start = start
	self.end = end
	self.parent_node = parent_node
	self.children = []
	self.split_point = -1

	def assign_split_point(self, split_point):
	self.split_point = split_point

	def add_child(self, child):
	self.children.append(child)


	def _is_local_maxima(index, RPlot, RPoints, neighborhood_size):
	# 0 = point at index is not local maxima
	# 1 = point at index is local maxima
	for i in range(1, neighborhood_size + 1):
	# process objects to the right of index
	if index + i < len(RPlot):
	if (RPlot[index] < RPlot[index + i]):
	return 0
	# process objects to the left of index
	if index - i >= 0:
	if (RPlot[index] < RPlot[index - i]):
	return 0
	return 1


	def _find_local_maxima(RPlot, RPoints, neighborhood_size):
	local_maxima_points = {}
	# 1st and last points on Reachability Plot are not taken
	# as local maxima points
	for i in range(1, len(RPoints) - 1):
	# if the point is a local maxima on the reachability plot with
	# regard to neighborhood_size, insert it into priority queue and
	# maxima list
	if (RPlot[i] > RPlot[i - 1] and RPlot[i] >= RPlot[i + 1] and
	_is_local_maxima(i, RPlot, RPoints, neighborhood_size) == 1):
	local_maxima_points[i] = RPlot[i]

	return sorted(local_maxima_points,
	key=local_maxima_points.__getitem__, reverse=True)


	def _cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
	min_cluster_size, maxima_ratio, rejection_ratio,
	similarity_threshold, significant_min):
	# node is a node or the root of the tree in the first call
	# parent_node is parent node of N or None if node is root of the tree
	# local_maxima_points is list of local maxima points sorted in
	# descending order of reachability
	if len(local_maxima_points) == 0:
	return # parent_node is a leaf

	# take largest local maximum as possible separation between clusters
	s = local_maxima_points[0]
	node.assign_split_point(s)
	local_maxima_points = local_maxima_points[1:]

	# create two new nodes and add to list of nodes
	node_1 = _TreeNode(RPoints[node.start:s], node.start, s, node)
	node_2 = _TreeNode(RPoints[s + 1:node.end], s + 1, node.end, node)
	local_max_1 = []
	local_max_2 = []

	for i in local_maxima_points:
	if i < s:
	local_max_1.append(i)
	if i > s:
	local_max_2.append(i)

	node_list = []
	node_list.append((node_1, local_max_1))
	node_list.append((node_2, local_max_2))

	if RPlot[s] < significant_min:
	node.assign_split_point(-1)
	# if split_point is not significant, ignore this split and continue
	_cluster_tree(node, parent_node, local_maxima_points, RPlot, RPoints,
	min_cluster_size, maxima_ratio, rejection_ratio,
	similarity_threshold, significant_min)
	return

	# only check a certain ratio of points in the child
	# nodes formed to the left and right of the maxima
	# ...should check_ratio be a user settable parameter?
	check_ratio = .8
	check_value_1 = int(np.round(check_ratio * len(node_1.points)))
	check_value_2 = int(np.round(check_ratio * len(node_2.points)))
	if check_value_2 == 0:
	check_value_2 = 1
	avg_reach_value_1 = float(np.average(RPlot[(node_1.end -
	check_value_1):node_1.end]))
	avg_reach_value_2 = float(np.average(RPlot[node_2.start:(node_2.start +
	check_value_2)]))

	if (float(avg_reach_value_1 / float(RPlot[s])) >
	maxima_ratio or float(avg_reach_value_2 /
	float(RPlot[s])) > maxima_ratio):

	if float(avg_reach_value_1 / float(RPlot[s])) < rejection_ratio:
	# reject node 2
	node_list.remove((node_2, local_max_2))
	if float(avg_reach_value_2 / float(RPlot[s])) < rejection_ratio:
	# reject node 1
	node_list.remove((node_1, local_max_1))
	if (float(avg_reach_value_1 / float(RPlot[s])) >=
	rejection_ratio and float(avg_reach_value_2 /
	float(RPlot[s])) >= rejection_ratio):
	node.assign_split_point(-1)
	# since split_point is not significant,
	# ignore this split and continue (reject both child nodes)
	_cluster_tree(node, parent_node, local_maxima_points,
	RPlot, RPoints, min_cluster_size,
	maxima_ratio, rejection_ratio,
	similarity_threshold, significant_min)
	return

	# remove clusters that are too small
	if (len(node_1.points) < min_cluster_size and
	node_list.count((node_1, local_max_1)) > 0):
	# cluster 1 is too small
	node_list.remove((node_1, local_max_1))
	if (len(node_2.points) < min_cluster_size and
	node_list.count((node_2, local_max_1)) > 0):
	# cluster 2 is too small
	node_list.remove((node_2, local_max_2))
	if len(node_list) == 0:
	# parent_node will be a leaf
	node.assign_split_point(-1)
	return

	# Check if nodes can be moved up one level - the new cluster created
	# is too "similar" to its parent, given the similarity threshold.
	bypass_node = 0
	if parent_node is not None:
	if (float(float(node.end - node.start) /
	float(parent_node.end - parent_node.start)) >
	similarity_threshold):

	parent_node.children.remove(node)
	bypass_node = 1

	for nl in node_list:
	if bypass_node == 1:
	parent_node.add_child(nl[0])
	_cluster_tree(nl[0], parent_node, nl[1], RPlot, RPoints,
	min_cluster_size, maxima_ratio, rejection_ratio,
	similarity_threshold, significant_min)
	else:
	node.add_child(nl[0])
	_cluster_tree(nl[0], node, nl[1], RPlot, RPoints, min_cluster_size,
	maxima_ratio, rejection_ratio,
	similarity_threshold, significant_min)


	def _get_leaves(node, arr):
	if node is not None:
	if node.split_point == -1:
	arr.append(node)
	for n in node.children:
	_get_leaves(n, arr)
	return arr


	def _extract_DBSCAN(setofobjects, epsilon_prime):
	# Extract DBSCAN Equivalent cluster structure
	# Important: Epsilon prime should be less than epsilon used in OPTICS
	cluster_id = 0
	for entry in setofobjects.ordering_:
	if setofobjects.reachability_[entry] > epsilon_prime:
	if setofobjects.core_dists_[entry] <= epsilon_prime:
	cluster_id += 1
	setofobjects._cluster_id[entry] = cluster_id
	else:
	# This is only needed for compatibility for repeated scans.
	setofobjects._cluster_id[entry] = -1 # Noise points
	setofobjects._is_core[entry] = 0
	else:
	setofobjects._cluster_id[entry] = cluster_id
	if setofobjects.core_dists_[entry] <= epsilon_prime:
	setofobjects._is_core[entry] = 1 # True
	else:
	setofobjects._is_core[entry] = 0 # False
	"""
	===================================
	Demo of OPTICS clustering algorithm
	===================================

	Finds core samples of high density and expands clusters from them.
	The key specificity of this example is that the data is generated
	so that the clusters have different densities.
	"""

	# Authors: Shane Grigsby <[email protected]>
	# Amy X. Zhang <[email protected]>
	# License: BSD 3 clause


	from sklearn.cluster.optics_ import OPTICS
	import numpy as np

	import matplotlib.pyplot as plt

	# Generate sample data

	np.random.seed(0)
	n_points_per_cluster = 250

	X = np.empty((0, 2))
	X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
	X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
	X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
	X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
	X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
	X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]

	# Plot scatterplot of points

	plt.figure(figsize=(7, 7))

	plt.subplot(221)
	plt.plot(X[:, 0], X[:, 1], 'b.', ms=2)
	plt.title("Raw Data")

	# Compute OPTICS

	clust = OPTICS(eps=30.3, min_samples=9, metric='minkowski')

	# Run the fit
	clust.fit(X)

	# Plot result

	core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
	core_samples_mask[clust.core_sample_indices_] = True

	# Black removed and is used for noise instead.
	unique_labels = set(clust.labels_)
	colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

	plt.subplot(222)

	for k, col in zip(unique_labels, colors):
	if k == -1:
	# Black used for noise.
	col = 'k'

	class_member_mask = (clust.labels_ == k)

	xy = X[class_member_mask & core_samples_mask]
	plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
	markeredgecolor='k', markersize=14, alpha=0.5)

	xy = X[class_member_mask & ~core_samples_mask]
	plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
	markeredgecolor='k', markersize=2, alpha=0.5)

	plt.title("Automatic Clustering \n Estimated number of clusters: %d"
	% clust.n_clusters)

	# (Re)-extract clustering structure, using a single eps to show comparison
	# with DBSCAN. This can be run for any clustering distance, and can be run
	# multiple times without rerunning OPTICS. OPTICS does need to be re-run to c
	# hange the min-pts parameter.

	clust.extract(.15, 'dbscan')

	core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
	core_samples_mask[clust.core_sample_indices_] = True

	# Black removed and is used for noise instead.
	unique_labels = set(clust.labels_)
	colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

	plt.subplot(223)

	for k, col in zip(unique_labels, colors):
	if k == -1:
	# Black used for noise.
	col = 'k'

	class_member_mask = (clust.labels_ == k)

	xy = X[class_member_mask & core_samples_mask]
	plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
	markeredgecolor='k', markersize=14, alpha=0.5)

	xy = X[class_member_mask & ~core_samples_mask]
	plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
	markeredgecolor='k', markersize=2, alpha=0.5)

	plt.title('DBSCAN with epsilon of 0.15 \n Estimated number of clusters: %d'
	% clust.n_clusters)

	# Try with different eps to highlight the problem


	clust.extract(.4, 'dbscan')


	core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
	core_samples_mask[clust.core_sample_indices_] = True

	# Black removed and is used for noise instead.
	unique_labels = set(clust.labels_)
	colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

	plt.subplot(224)

	for k, col in zip(unique_labels, colors):
	if k == -1:
	# Black used for noise.
	col = 'k'

	class_member_mask = (clust.labels_ == k)

	xy = X[class_member_mask & core_samples_mask]
	plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
	markeredgecolor='k', markersize=14, alpha=0.5)

	xy = X[class_member_mask & ~core_samples_mask]
	plt.plot(xy[:, 0], xy[:, 1], '.', markerfacecolor=col,
	markeredgecolor='k', markersize=2, alpha=0.5)

	plt.title('DBSCAN with epsilon of 0.40 \n Estimated number of clusters: %d'
	% clust.n_clusters)

	plt.tight_layout()
	plt.show()