Last active
December 6, 2023 03:24
-
-
Save swo/6a682721c90095cbf05c76ba3bee1b67 to your computer and use it in GitHub Desktop.
Polars cheat sheet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import polars as pl | |
# Making data frames ------------------------------------------ | |
# with dictionary | |
df = pl.DataFrame({ | |
'name': ['foo', 'bar', 'baz'], | |
'bar': [0, 1, 2], | |
'qux': [0.0, 1.0, 2.0] | |
}) | |
# as a list of Series | |
df = pl.DataFrame([ | |
pl.Series(['foo', 'bar', 'baz']).alias('name'), | |
pl.Series([0, 1, 2]).alias('bar'), | |
pl.Series([0.0, 1.0, 2.0]).alias('qux') | |
]) | |
# Selecting and renaming ----------------------------------- | |
# Select columns (and drop the rest) | |
df.select(['x', 'y']) | |
# Drop a column (and keep the rest) | |
df.drop('x') | |
# Rename column | |
df.rename({'old_name': 'new_name'}) | |
# Select (and drop the rest) and rename | |
df.select(pl.col('x').alias('new_x')) | |
df.select(new_x = pl.col('x')) | |
# Mutation -------------------------------------------------- | |
# Mutate and select | |
df.select( | |
pl.col('name').sort(), | |
pl.col('bar').sum().alias('bar_sum') | |
) | |
# Adding a new column | |
df.with_columns(constant_col = pl.Series([1])) | |
df.with_columns([pl.Series([1]).alias('constant_col')]) | |
# Simple math on one column | |
df.with_columns(x_plus_one = pl.col('x') + 1) | |
df.with_columns((pl.col('x') + 1).alias('x_plus_one')) | |
# Mutate a column in place | |
df.with_columns(pl.col('x') + 1) # column 'x' gets 1 added to it | |
# Simple math across columns | |
df.with_columns(x_plus_y = pl.col('x') + pl.col('y')) | |
# Functions of multiple columns | |
df.with_columns( | |
pl.struct(['x', 'y']) | |
.apply(lambda row: row['x'] + row['y']) | |
.alias('x_plus_y') | |
) | |
# Mutate across multiple columns | |
df.select(pl.col('a', 'b') + 1) | |
import polars.selectors as cs | |
df.with_columns(cs.by_name(['x', 'y']) + 1) | |
# Grouped mutation ------------------------------------------ | |
# Aggregate results, reducing number of rows | |
df.group_by('group').agg(sum_by_group = pl.col('x').sum()) | |
# Do a mutation within a group without aggregating | |
df.with_columns(cumsum_x_by_group = pl.col('x').cumsum().over('group')) | |
# Counting | |
df.group_by('group').count() | |
# Printings ----------------------------------------------- | |
# print all rows | |
with pl.Config() as cfg: | |
cfg.set_tbl_rows(-1) | |
print(df) | |
def print_all(df): | |
with pl.Config() as cfg: | |
cfg.set_tbl_rows(-1) | |
print(df) | |
df.pipe(print_all) | |
# Similar for all columns with set_tbl_cols | |
# Dates --------------------------------------------------- | |
# round down to prior Sunday | |
x.dt.offset_by("1d").dt.truncate("1w").dt.offset_by("-1d") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment