Pipe fittings for inspecting pandas
pipelines.
Last active
January 13, 2024 01:14
-
-
Save blakeNaccarato/e9f3c76a1cd2e7b0879c858141e91cbd to your computer and use it in GitHub Desktop.
Pipe fittings for inspecting pandas pipelines.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Pipe fittings for inspecting `pandas` pipelines. | |
Pandas allows method chaining of user-supplied functions via `pipe`. This module | |
facilitates pipeline inspection either by tapping into a function that you control via | |
the `tap` decorator, or by inserting a `tee` into the pipeline as in | |
`df.<pipeline>.pipe(tee).<pipeline>`. | |
""" | |
from functools import wraps | |
import logging | |
def default_preview(df) -> str: | |
"""Default preview function for a `pandas` dataframe or series.""" | |
return ( | |
f"type: {type(df)}" | |
f"\nshape: {df.shape}" | |
f"\nstats:\n{df.describe(percentiles=[])}" | |
) | |
def tap(enable: bool = True, preview=default_preview): | |
"""Decorate a function to tap into a `pandas` pipeline and preview the dataframe. | |
Pandas allows method chaining of user-supplied functions via `pipe`. When this | |
decorator adorns such a user-supplied function, it will log the function name, the | |
keyword arguments passed to the function, and a preview of the resulting dataframe | |
or series (by default: its type, shape, and statistics) to the `INFO` log level. | |
A custom `preview` function may also be provided, which must take a dataframe or | |
series and return a string. | |
Parameters | |
---------- | |
enable : bool, optional | |
Enable the tap. Default is True. | |
preview: optional | |
The preview function. Default previews its type, shape, and statistics. | |
Example | |
------- | |
import pandas as pd | |
import numpy as np | |
def main(): | |
pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])) | |
df = df.<pipeline>.pipe(my_func).<pipeline> | |
@tap(enable=True) | |
def my_func(df): | |
# Arbitrary implementation | |
return df | |
if __name__ == "__main__": | |
main() | |
""" | |
def decorator(func): | |
@wraps(func) | |
def wrapper(df, **kwargs): | |
if enable: | |
df = func(df, **kwargs) | |
logging.info( | |
f"\nfunc: {func.__name__}" | |
f"\nkwargs: {kwargs}" | |
f"\n{preview(df)}" | |
"\n" | |
) | |
else: | |
df = func(df, **kwargs) | |
return df | |
return wrapper | |
return decorator | |
def tee(df, enable: bool = True, preview=default_preview): | |
"""Insert into a `pandas` pipeline e.g. `df.pipe(tee)` and preview the dataframe. | |
Pandas allows method chaining of user-supplied functions via `pipe`. When this | |
function is part of the pipeline, it will log a preview of the resulting dataframe | |
(its type, shape, and statistics) to the `INFO` log level. | |
A custom `preview` function may also be provided, which must take a dataframe or | |
series and return a string. | |
Parameters | |
---------- | |
df : pandas.DataFrame | pandas.Series | |
A `pandas` dataframe or series. | |
enable : bool, optional | |
Enable the tee. Default is True. | |
preview: optional | |
The preview function. Default previews its type, shape, and statistics. | |
Example | |
-------- | |
import pandas as pd | |
import numpy as np | |
def main(): | |
pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])) | |
df = df.<pipeline>.pipe(tee).<pipeline> | |
if __name__ == "__main__": | |
main() | |
""" | |
if enable: | |
logging.info(f"\nfunc: tee\n{preview(df)}\n") | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment