nikitajz · July 1, 2024 15:22
diff --git a/[polars] Data manipulation.md b/[polars] Data manipulation.md
diff --git a/[polars] Read data.md b/[polars] Read data.md
diff --git a/Cheatsheet and examples b/Cheatsheet and examples
 # Cheatsheet
 https://franzdiebold.github.io/polars-cheat-sheet/Polars_cheat_sheet.pdf

 # PyData by datenzauberai
 https://github.com/datenzauberai/PyConDE-2023--Polars-make-the-switch/blob/main/Polars%20-%20make%20the%20switch%20to%20lightning-fast%20dataframes%20-%20Versand.pdf

 # More examples
 https://github.com/martinbel/polars-tutorial/tree/master


 ```
 # select/slice columns
 select
 # create/transform/assign columns
 with_columns
 # filter/slice/query rows
 filter
 # join/merge another dataframe
 join
 # group dataframe rows
 groupby
 # aggregate groups
 agg
 # sort dataframe
 sort
 ```


 ```
 articles = pl.read_parquet("articles.parquet")
 sales = pl.read_parquet("sales.parquet")
 monthly_best_sellers_2019 = (
 sales
 .with_columns([
 pl.col("date").dt.year().alias("year"),
 pl.col("date").dt.month().alias("month"),
 ])
 .filter(pl.col("year") == 2019)
 .join(articles, on=
 "article_id")
 .groupby(["product_code"
 ,
 "month"])
 .agg(pl.col("price").sum().alias("total_sales"))
 .filter(
 pl.col("total_sales") ==
 pl.col("total_sales").max().over("month")
 )
 .select(["month"
 ,
 "product_code"])
 .sort("month")
 )
 ```
	# Cheatsheet
	https://franzdiebold.github.io/polars-cheat-sheet/Polars_cheat_sheet.pdf

	# PyData by datenzauberai
	https://github.com/datenzauberai/PyConDE-2023--Polars-make-the-switch/blob/main/Polars%20-%20make%20the%20switch%20to%20lightning-fast%20dataframes%20-%20Versand.pdf

	# More examples
	https://github.com/martinbel/polars-tutorial/tree/master


	```
	# select/slice columns
	select
	# create/transform/assign columns
	with_columns
	# filter/slice/query rows
	filter
	# join/merge another dataframe
	join
	# group dataframe rows
	groupby
	# aggregate groups
	agg
	# sort dataframe
	sort
	```


	```
	articles = pl.read_parquet("articles.parquet")
	sales = pl.read_parquet("sales.parquet")
	monthly_best_sellers_2019 = (
	sales
	.with_columns([
	pl.col("date").dt.year().alias("year"),
	pl.col("date").dt.month().alias("month"),
	])
	.filter(pl.col("year") == 2019)
	.join(articles, on=
	"article_id")
	.groupby(["product_code"
	,
	"month"])
	.agg(pl.col("price").sum().alias("total_sales"))
	.filter(
	pl.col("total_sales") ==
	pl.col("total_sales").max().over("month")
	)
	.select(["month"
	,
	"product_code"])
	.sort("month")
	)
	```