Created
September 5, 2021 09:32
-
-
Save karpanGit/c66ba23aae4b207270bf91f8632df98a to your computer and use it in GitHub Desktop.
pandas, groupby, aggregation (named)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import random | |
| # create a dataframe | |
| n = 50 | |
| df = pd.DataFrame({'a': random.choices(['foo', 'boo', 'bah'], k=n), 'b': np.random.rand(n), 'c': np.random.rand(n)*10}) | |
| grouped = df.groupby('a') | |
| # example 1, one column, one function, name(s) not specified | |
| res = grouped['b'].agg(np.mean) | |
| print(res) | |
| # example 2, one column, many functions, name(s) not specified | |
| res = grouped['b'].agg([np.mean, lambda x: x.mean() - x.median()]) | |
| print(res) | |
| # example 3, many columns, many functions, name(s) not specified | |
| res = grouped[['b', 'c']].agg([np.mean, lambda x: x.mean() - x.median()]) | |
| print(res) | |
| # example 4, many columns, many functions, names specified (no spaces) | |
| res = grouped[['b', 'c']].agg(b_mean=pd.NamedAgg('b', np.mean), c_mean_median_diff=pd.NamedAgg('c', lambda x: x.mean()-x.median())) | |
| print(res) | |
| # example 5, many columns, many functions, names specified (spaces) | |
| res = grouped[['b', 'c']].agg(**{'b mean': pd.NamedAgg(column='b', aggfunc=np.mean), | |
| 'c mean median diff': pd.NamedAgg('c', lambda x: x.mean()-x.median())}) | |
| print(res) | |
| # example 6, many columns, many functions, names specified (spaces) | |
| res = grouped[['b', 'c']].agg(**{'b mean': ('b', np.mean), | |
| 'c mean median diff': ('c', lambda x: x.mean()-x.median())}) | |
| print(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment