df = df[[s.name for s in df if not (s.null_count() == df.height)]]
df.select([
pl.all(),
pl.col("price").mean().alias("mean_price")
])
Note, this produce shape (1,1):
df.select([
pl.col("price").mean()
])
Groupby categorical value and convert to wide format (e.g. columns: ["account", "product", "count"]
)
agg_df = (df
.group_by(["account", "product"])
.agg(pl.count())
.sort(by="account")
)
agg_df.head()
agg_counts_wide = agg_df.pivot(values="count", index="account", columns="product").fill_null(0)
agg_counts_wide
df_user_brand_event = df_user_brand_event.with_columns(
pl.when(pl.col("pct_buy_views").is_infinite())
.then(None)
.otherwise(pl.col("pct_buy_views"))
)
https://pola-rs.github.io/polars-book/user-guide/howcani/timeseries/parsing_dates_times.html
df = df.with_column(
pl.col("event_time").str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S %Z")
)