Skip to content

Instantly share code, notes, and snippets.

@jongbinjung
Last active September 8, 2025 22:21
Show Gist options
  • Save jongbinjung/fcd5ab5b17a29a3b7007acb06d589460 to your computer and use it in GitHub Desktop.
Save jongbinjung/fcd5ab5b17a29a3b7007acb06d589460 to your computer and use it in GitHub Desktop.
"""
* If scheduling for the Month of March, 2025 and a week starts on Sunday
March, 2025
Sun Mon Tue Wed Thu Fri Sat
23 24 25 26 27 28 1 -> Week i
2 3 4 5 6 7 8 -> Week i + 1
9* 10 11 12 13 14 15 -> Week i + 2
16 17 18 19 20 21 22 -> Week i + 3
23 24 25 26 27 28 29 -> Week i + 4
30 31 -> Week i + 5
* Daylight Saving Time starts (clocks move forward one hour) on March 9, 2025, at 2:00 AM, local standard time.
"""
import pendulum
import polars as pl
# What if we only have shifts on 3/1, <first/last date of each week>, and 3/31?
df = pl.DataFrame(
{
"date": [
pendulum.datetime(2025, 3, 1, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 2, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 8, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 9, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 15, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 16, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 22, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 23, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 29, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 30, tz="America/Los_Angeles"),
pendulum.datetime(2025, 3, 31, tz="America/Los_Angeles"),
],
}
).with_columns(epoch=pl.col("date").dt.epoch("s"))
# Pendulum starts a week on Monday
first_week_start_epoch = pendulum.instance(df["date"].min()).start_of("week").subtract(days=1).int_timestamp
seconds_in_a_week = 7 * 24 * 60 * 60
i = 0
with pl.Config(tbl_rows=20):
print(df.with_columns(week_num=i + (pl.col("epoch") - first_week_start_epoch) // seconds_in_a_week))
# ┌─────────────────────────────────┬────────────┬──────────┐
# │ date ┆ epoch ┆ week_num │
# ╞═════════════════════════════════╪════════════╪══════════╡
# │ 2025-03-01 00:00:00 PST ┆ 1740816000 ┆ 0 │
# │ 2025-03-02 00:00:00 PST ┆ 1740902400 ┆ 1 │
# │ 2025-03-08 00:00:00 PST ┆ 1741420800 ┆ 1 │
# │ 2025-03-09 00:00:00 PST ┆ 1741507200 ┆ 2 │
# │ 2025-03-15 00:00:00 PDT ┆ 1742022000 ┆ 2 │
# │ 2025-03-16 00:00:00 PDT ┆ 1742108400 ┆ 2 │ --> This should've been week 3, but is off by an hour because of DST
# │ 2025-03-22 00:00:00 PDT ┆ 1742626800 ┆ 3 │
# │ 2025-03-23 00:00:00 PDT ┆ 1742713200 ┆ 3 │
# │ 2025-03-29 00:00:00 PDT ┆ 1743231600 ┆ 4 │
# │ 2025-03-30 00:00:00 PDT ┆ 1743318000 ┆ 4 │
# │ 2025-03-31 00:00:00 PDT ┆ 1743404400 ┆ 5 │
# └─────────────────────────────────┴────────────┴──────────┘
# We can make it work by _REPLACING_ the timezone to UTC before converting to epoch
first_week_start_epoch = (
# Replace timezone to UTC
pendulum.instance(df["date"].dt.replace_time_zone("UTC").min())
.start_of("week")
.subtract(days=1).int_timestamp
)
i = 0
with pl.Config(tbl_rows=20):
print(
df.with_columns(
week_num=i
+ (
# Replace timezone to UTC
pl.col("date").dt.replace_time_zone("UTC").dt.epoch("s")
- first_week_start_epoch
) // seconds_in_a_week,
)
)
# ┌─────────────────────────────────┬────────────┬──────────┐
# │ date ┆ epoch ┆ week_num │
# ╞═════════════════════════════════╪════════════╪══════════╡
# │ 2025-03-01 00:00:00 PST ┆ 1740816000 ┆ 0 │
# │ 2025-03-02 00:00:00 PST ┆ 1740902400 ┆ 1 │
# │ 2025-03-08 00:00:00 PST ┆ 1741420800 ┆ 1 │
# │ 2025-03-09 00:00:00 PST ┆ 1741507200 ┆ 2 │
# │ 2025-03-15 00:00:00 PDT ┆ 1742022000 ┆ 2 │
# │ 2025-03-16 00:00:00 PDT ┆ 1742108400 ┆ 3 │
# │ 2025-03-22 00:00:00 PDT ┆ 1742626800 ┆ 3 │
# │ 2025-03-23 00:00:00 PDT ┆ 1742713200 ┆ 4 │
# │ 2025-03-29 00:00:00 PDT ┆ 1743231600 ┆ 4 │
# │ 2025-03-30 00:00:00 PDT ┆ 1743318000 ┆ 5 │
# │ 2025-03-31 00:00:00 PDT ┆ 1743404400 ┆ 5 │
# └─────────────────────────────────┴────────────┴──────────┘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment