Skip to content

Instantly share code, notes, and snippets.

@karpanGit
Created November 13, 2021 13:26
Show Gist options
  • Select an option

  • Save karpanGit/0fd311f3ee0f915a678514602204a200 to your computer and use it in GitHub Desktop.

Select an option

Save karpanGit/0fd311f3ee0f915a678514602204a200 to your computer and use it in GitHub Desktop.
pandas, reorder categorical
# experiment with ordering and sorting
import pandas as pd
# create a categorical series
s = pd.Series(pd.Categorical(['a', 'b', 'c', 'a'], ordered=True))
print(s)
print(s.cat.categories)
print(s.cat.codes)
# set categories
print('-- set categories (changes the series values) --')
s2 = s.cat.set_categories(['c', 'a', 'b'])
print(s2)
print(s2.cat.categories)
print(s2.cat.codes)
# set categories (second method)
print('-- set categories 2 (does not change the series values) --')
s3 = s.copy(deep=True)
s3.cat.categories = ['c', 'a', 'b']
print(s3)
print(s3.cat.categories)
print(s3.cat.codes)
# set categories (third method)
print('-- set categories 3 (does not change the series values) --')
s4 = s.cat.set_categories(['c', 'a', 'b'], rename=True)
print(s4)
print(s4.cat.categories)
print(s4.cat.codes)
# reorder categories
print('-- reorder categories (changes the series values) --')
s5 = s.cat.reorder_categories(['c', 'a', 'b'])
print(s5)
print(s5.cat.categories)
print(s5.cat.codes)
# prints
# 0 a
# 1 b
# 2 c
# 3 a
# dtype: category
# Categories (3, object): ['a' < 'b' < 'c']
# Index(['a', 'b', 'c'], dtype='object')
# 0 0
# 1 1
# 2 2
# 3 0
# dtype: int8
# -- set categories (changes the series values) --
# 0 a
# 1 b
# 2 c
# 3 a
# dtype: category
# Categories (3, object): ['c' < 'a' < 'b']
# Index(['c', 'a', 'b'], dtype='object')
# 0 1
# 1 2
# 2 0
# 3 1
# dtype: int8
# -- set categories 2 (does not change the series values) --
# 0 c
# 1 a
# 2 b
# 3 c
# dtype: category
# Categories (3, object): ['c' < 'a' < 'b']
# Index(['c', 'a', 'b'], dtype='object')
# 0 0
# 1 1
# 2 2
# 3 0
# dtype: int8
# -- set categories 3 (does not change the series values) --
# 0 c
# 1 a
# 2 b
# 3 c
# dtype: category
# Categories (3, object): ['c' < 'a' < 'b']
# Index(['c', 'a', 'b'], dtype='object')
# 0 0
# 1 1
# 2 2
# 3 0
# dtype: int8
# -- reorder categories (changes the series values) --
# 0 a
# 1 b
# 2 c
# 3 a
# dtype: category
# Categories (3, object): ['c' < 'a' < 'b']
# Index(['c', 'a', 'b'], dtype='object')
# 0 1
# 1 2
# 2 0
# 3 1
# dtype: int8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment