Last active
October 11, 2016 14:50
-
-
Save jayelm/129382ba73ca2fb390c870350bed2d30 to your computer and use it in GitHub Desktop.
Convenient Cluster Counter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Neat wrapper around a autoincrementing defaultdict. Most useful for assigning | |
unique numbers to unseen examples while clustering, but probably has other uses | |
as well. | |
Example usage | |
In [1]: cc = ClusterCounter() | |
In [2]: cc['setosa'] | |
Out[2]: 0 | |
In [3]: cc['virginica'] | |
Out[3]: 1 | |
In [4]: cc['versicolor'] | |
Out[4]: 2 | |
In [5]: cc['new_species'] | |
Out[5]: 3 | |
In [6]: cc['virginica'] | |
Out[6]: 1 | |
In [7]: cc.nclus | |
Out[7]: 4 | |
Following is ClusterCounter2 only: | |
In [8]: 'new_species' in cc | |
Out[8]: True | |
In [9]: cc.to_dict() | |
Out[9]: {'new_species': 3, 'setosa': 0, 'versicolor': 2, 'virginica': 1} | |
""" | |
from collections import defaultdict | |
class ClusterCounter(object): | |
""" | |
A bare minimum cluster counter. Supports the dynamic assignment and | |
retrieval of numerical clusters, starting from 0. | |
""" | |
def __init__(self): | |
self._cn = 0 | |
def new_cluster(): | |
c = self._cn | |
self._cn += 1 # Increment cnum before returning | |
return c | |
self._autocounter = defaultdict(new_cluster) | |
def __getitem__(self, val): | |
return self._autocounter[val] | |
@property | |
def nclus(self): | |
return self._cn | |
class ClusterCounter2(object): | |
""" | |
A more detailed implementation of a ClusterCounter, with additional | |
functionality and documentation. | |
""" | |
def __init__(self, initial_n=0): | |
""" | |
Initialize a cluster counter. Optional: specify a starting cluster | |
number (e.g. 1). | |
""" | |
self._initial_n = initial_n | |
self._cn = initial_n | |
def new_cluster(): | |
c = self._cn | |
self._cn += 1 # Increment cnum before returning | |
return c | |
self._autocounter = defaultdict(new_cluster) | |
def __getitem__(self, val): | |
""" | |
Get the numerical cluster assignment for the given value. If the value | |
does not exist, | |
""" | |
return self._autocounter[val] | |
def get(self, val): | |
"""An alias for cc[val].""" | |
return self.__getitem__(val) | |
def __contains__(self, key): | |
""" | |
Return True if the ClusterCounter has an assignment for the given | |
key, False otherwise, *without* updating the defaultdict if not. | |
""" | |
return key in self._autocounter | |
def to_dict(self): | |
"""Return the current state of the counter in builtin dict form.""" | |
return dict(self._autocounter) | |
def keys(self): | |
"""Return the items whose clusters have already been assigned.""" | |
return self._autocounter.keys() | |
@property | |
def nclus(self): | |
"""Return the number of clusters currently observed.""" | |
return self._cn - self._initial_n | |
@property | |
def initial_n(self): | |
"""Return the number the ClusterCounter started at.""" | |
return self._initial_n |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment