|
import pandas |
|
import numpy |
|
|
|
#create some categorical data |
|
In [ 1]: categories = list( 'abcdefghi') |
|
In [ 2]: category_series = pandas.Series( map(lambda x: categories[x], numpy.random.randint( |
|
low = 0, high = len(categories), size = [100, ]))) |
|
|
|
In [ 3]: category_series.head() |
|
Out[78]: |
|
0 e |
|
1 d |
|
2 f |
|
3 h |
|
4 a |
|
dtype: object |
|
|
|
#Create a pandas.Categorical object |
|
In [79]: cat_obj = pandas.Categorical(category_series) |
|
|
|
#here are the integer values |
|
In [80]: cat_obj.labels |
|
Out[80]: |
|
array([4, 3, 5, 7, 0, 6, 8, 1, 2, 1, 1, 2, 3, 4, 6, 2, 6, 2, 7, 4, 4, 3, 0, |
|
3, 7, 1, 6, 1, 7, 3, 7, 5, 4, 0, 8, 0, 8, 4, 3, 7, 4, 0, 8, 3, 3, 8, |
|
7, 6, 4, 0, 4, 7, 0, 0, 7, 8, 1, 1, 0, 2, 2, 7, 5, 1, 3, 7, 1, 3, 7, |
|
3, 4, 0, 0, 6, 6, 4, 6, 4, 3, 1, 3, 8, 5, 3, 7, 0, 7, 7, 3, 1, 7, 6, |
|
7, 3, 1, 1, 2, 6, 6, 0]) |
|
|
|
#here's a dictionary mapping |
|
dict(zip(cat_obj, cat_obj.labels) ) |
|
Out[81]: {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8} |
|
|
|
|