Skip to content

Instantly share code, notes, and snippets.

@karpanGit
Created September 4, 2021 12:15
Show Gist options
  • Select an option

  • Save karpanGit/4cbccc6511d47e9ad7cee16d827a594d to your computer and use it in GitHub Desktop.

Select an option

Save karpanGit/4cbccc6511d47e9ad7cee16d827a594d to your computer and use it in GitHub Desktop.
pandas, slicing, multiindex
import pandas as pd
import numpy as np
# create a dataset with multi indices for both both index and columns
def indx_names(prefix: str, num: int):
return [f'{prefix}{_:_>2}' for _ in range(num)]
idx = pd.MultiIndex.from_product([indx_names('A',4), indx_names('B',2), indx_names('C',4), indx_names('D',2)])
cols = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1'])
df = pd.DataFrame(np.arange(len(idx)*len(cols)).reshape(len(idx), len(cols)), index=idx, columns=cols)
# select using slices (slice(None) means all, levels at the right can be ommitted this is treated as slice(None))
res = df.loc[(slice(None), ['B_1']), :]
res = df.loc[(slice(None), ['B_1'], slice(None), ['D_0']), :]
res = df.loc[(slice(None), ['B_1']), (slice(None), 'foo')]
# select using IndexSlice
from pandas import IndexSlice
idxS = IndexSlice
res = df.loc[idxS[:, 'B_1'], :]
res = df.loc[idxS[:, ['B_1'], :, 'D_0'], :]
res = df.loc[idxS[:, ['B_1']], idxS[:, 'foo']]
# select using IndexSlice and boolean masking
msk = df.loc[:, idxS['a', 'foo']] > 100
res = df.loc[idxS[msk, ['B_1']], idxS[:, 'foo']]
# setting values using IndexSlice and boolean masking
msk = df.loc[:, idxS['a', 'foo']] > 100
df.loc[idxS[msk, ['B_1']], idxS[:, 'foo']] = -df
df.loc[idxS[msk, ['B_1']], idxS[:, 'foo']] = -df.loc[idxS[msk, ['B_1']], idxS[:, 'foo']]
# using the axis argument in the loc argument
res = df.loc(axis='index')[idxS[:, 'B_1']]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment