Skip to content

Instantly share code, notes, and snippets.

@tommylees112
Created March 4, 2025 07:43
Show Gist options
  • Save tommylees112/9c21e3dfda2233b5f471d341c400a9fc to your computer and use it in GitHub Desktop.
Save tommylees112/9c21e3dfda2233b5f471d341c400a9fc to your computer and use it in GitHub Desktop.
Get a summary of the columns in a dataframe
import pandas as pd
def detailed_column_summary(df: pd.DataFrame) -> pd.DataFrame:
summary = []
for ix, col in enumerate(df.columns):
unique_vals = df[col].unique()
summary.append(
{
"column_index": ix,
"column": col,
"dtype": df[col].dtype,
"n_unique": len(unique_vals),
"n_null": df[col].isna().sum(),
"memory_usage": df[col].memory_usage(deep=True) / 1024, # KB
"example_values": unique_vals[:5],
}
)
summary_df = pd.DataFrame(summary)
summary_df = summary_df.set_index(summary_df.columns[0])
# Format memory usage
summary_df["memory_usage"] = summary_df["memory_usage"].round(2).astype(str) + " KB"
return summary_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment