Last active
November 23, 2023 19:01
-
-
Save thomasaarholt/d81c0cca779978b2c0a9ee91b5f94f85 to your computer and use it in GitHub Desktop.
Structlog example showing how to serialize polars and pandas dataframes and pydantic models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Any | |
import json | |
import structlog | |
import pandas as pd # pip install pandas | |
import polars as pl # pip install polars | |
from pydantic import BaseModel | |
class PydanticModel(BaseModel): | |
a: int | |
b: str | |
df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) | |
df2 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) | |
pydantic_model = PydanticModel(a=1, b="a") | |
def default(obj: pl.DataFrame | pd.DataFrame | BaseModel) -> dict[str, Any]: | |
"Return a JSON serializable version of a DataFrame or Pydantic model." | |
if isinstance(obj, pl.DataFrame): | |
return json.loads(obj.write_json()) | |
elif isinstance(obj, pd.DataFrame): | |
return json.loads(obj.to_json()) | |
elif isinstance(obj, BaseModel): | |
return obj.model_dump(mode="json") | |
raise TypeError( | |
f"Object of type {obj.__class__.__name__} is not JSON serializable. " | |
"Add entries to the `default` function to support this type." | |
) | |
# BAD | |
# JSON is just a jsonified version of __repr__ | |
processors = [structlog.processors.JSONRenderer()] | |
structlog.configure(processors) | |
log = structlog.stdlib.get_logger() | |
log.info("polars", df=df) | |
log.info("pandas", df=df2) | |
log.info("pydantic", df=pydantic_model) | |
# {"df": "shape: (3, 2)\n\u250c\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 a \u2506 b \u2502\n\u2502 --- \u2506 --- \u2502\n\u2502 i64 \u2506 str \u2502\n\u255e\u2550\u2550\u2550\u2550\u2550\u256a\u2550\u2550\u2550\u2550\u2550\u2561\n\u2502 1 \u2506 a \u2502\n\u2502 2 \u2506 b \u2502\n\u2502 3 \u2506 c \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2518", "event": "polars"} | |
# {"df": " a b\n0 1 a\n1 2 b\n2 3 c", "event": "pandas"} | |
# {"df": "PydanticModel(a=1, b='a')", "event": "pydantic"} | |
# GOOD | |
# JSON is created from properly serializable objects | |
processors = [structlog.processors.JSONRenderer(default=default)] | |
structlog.configure(processors) | |
log.info("polars", df=df) | |
log.info("pandas", df=df2) | |
log.info("pydantic", df=pydantic_model) | |
# {"df": {"columns": [{"name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3]}, {"name": "b", "datatype": "Utf8", "bit_settings": "", "values": ["a", "b", "c"]}]}, "event": "polars"} | |
# {"df": {"a": {"0": 1, "1": 2, "2": 3}, "b": {"0": "a", "1": "b", "2": "c"}}, "event": "pandas"} | |
# {"df": {"a": 1, "b": "a"}, "event": "pydantic"} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment