Skip to content

Instantly share code, notes, and snippets.

@liquidcarbon
Created October 22, 2025 22:03
Show Gist options
  • Select an option

  • Save liquidcarbon/d4568fe0abd1c38b9866e014c7d09049 to your computer and use it in GitHub Desktop.

Select an option

Save liquidcarbon/d4568fe0abd1c38b9866e014c7d09049 to your computer and use it in GitHub Desktop.
groupby-stats - custom "describe" for pandas dataframe groupby
import numpy as np
import pandas as pd
def stats(self, cols, funcs=None):
"""
Like describe(), but allows custom stats and outputs flat columns like 'x-50', 'x-mean', etc.
Parameters
----------
cols : list | None
List of columns to aggregate.
funcs : list | None
List of statistics to compute.
Can include:
- strings (e.g. "mean", "std", "median")
- numbers (treated as percentiles)
- callables
Default: [50, np.mean, "std"]
"""
if funcs is None:
funcs = [50, np.mean, np.std]
funcs_labels = []
for fn in funcs:
if isinstance(fn, (int, float)): # percentile
label = f"{fn:g}"
funcs_labels.append((lambda x, q = fn: np.percentile(x, q), label))
elif callable(fn):
funcs_labels.append((fn, fn.__name__))
else:
raise TypeError(f"Unsupported stat type: {type(fn)}")
result = {}
for col in cols:
for fn, label in funcs_labels:
result[f"{col}-{label}"] = self[col].apply(fn)
result = pd.DataFrame(result, index=self.groups.keys())
return result
pd.core.groupby.generic.DataFrameGroupBy.stats = stats
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment