See boilercv.docs
for updated implementation
Last active
January 13, 2024 01:16
-
-
Save blakeNaccarato/3c751f0a9f0f5143f3cffc525e5dd577 to your computer and use it in GitHub Desktop.
A Python notebook with helper functions. `set_dtypes` to update column data types. `display_df` displays dataframes (with truncation) in the desired number format. Sets Jupyter notebook precision to the same number format.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from typing import Callable\n", | |
"\n", | |
"import pandas as pd\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"FLOAT_SPEC = \"#.4g\"\n", | |
"\n", | |
"pd.options.display.min_rows = pd.options.display.max_rows = 20\n", | |
"pd.options.display.float_format = f\"{{:{FLOAT_SPEC}}}\".format\n", | |
"%precision %$FLOAT_SPEC\n", | |
"\n", | |
"\n", | |
"def set_dtypes(df: pd.DataFrame, dtypes: dict[str, str]) -> pd.DataFrame:\n", | |
" \"\"\"Set column datatypes in a dataframe.\"\"\"\n", | |
" return df.assign(**{name: df[name].astype(dtype) for name, dtype in dtypes.items()})\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def display_df(*dfs: pd.DataFrame | pd.Series):\n", | |
" \"\"\"Display a formatted DataFrame.\n", | |
" \n", | |
" When a mapping of column names to callables is given to the Pandas styler, the\n", | |
" callable will be used internally by Pandas to produce formatted strings. This\n", | |
" differs from elementwise formatting, in which Pandas expects the callable to\n", | |
" actually process the value and return the formatted string.\n", | |
" \"\"\"\n", | |
" for df in dfs:\n", | |
" if isinstance(df, pd.Series):\n", | |
" df = df.to_frame()\n", | |
" df, truncated = truncate(df)\n", | |
" formatter = format_cell if truncated else get_df_formatter(df)\n", | |
" display(df.style.format(formatter))\n", | |
"\n", | |
"\n", | |
"def get_df_formatter(df: pd.DataFrame) -> dict[str, Callable]:\n", | |
" \"\"\"Get formatter for the dataframe.\"\"\"\n", | |
" cols = df.columns\n", | |
" types = {col: dtype.type for col, dtype in zip(cols, df.dtypes)}\n", | |
" return {col: get_formatter(types[col]()) for col in cols}\n", | |
"\n", | |
"\n", | |
"def format_cell(cell) -> str:\n", | |
" \"\"\"Format individual cells.\"\"\"\n", | |
" return get_formatter(cell)(cell)\n", | |
"\n", | |
"\n", | |
"def get_formatter(instance) -> str:\n", | |
" \"\"\"Get the formatter depending on the type of the instance.\"\"\"\n", | |
" match instance:\n", | |
" case float():\n", | |
" return lambda cell: f\"{cell:{FLOAT_SPEC}}\"\n", | |
" case _:\n", | |
" return lambda cell: f\"{cell}\"\n", | |
"\n", | |
"\n", | |
"def truncate(df: pd.DataFrame | pd.Series) -> tuple[pd.DataFrame, bool]:\n", | |
" \"\"\"Truncate long dataframes, showing only the head and tail.\"\"\"\n", | |
"\n", | |
" if len(df) <= pd.options.display.max_rows:\n", | |
" return df, False\n", | |
"\n", | |
" df_truncated = pd.concat(\n", | |
" [\n", | |
" df.head(pd.options.display.min_rows // 2),\n", | |
" pd.Series(name=\"...\", dtype=object).to_frame().T,\n", | |
" df.tail(pd.options.display.min_rows // 2),\n", | |
" ]\n", | |
" )\n", | |
" df_truncated.loc[\"...\", :] = \"...\"\n", | |
"\n", | |
" return df_truncated, True\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.10.4 ('data')", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.6" | |
}, | |
"orig_nbformat": 4, | |
"vscode": { | |
"interpreter": { | |
"hash": "71731e66821fe2c6da416d2ded4a1ef99f386126a5584fcd16f8e3a2b440f5bd" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment