Created
February 10, 2022 16:26
-
-
Save cpcloud/adff446406b91fb12207c4864df76862 to your computer and use it in GitHub Desktop.
ibis ffill
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pprint import pprint as print | |
| import duckdb | |
| import ibis | |
| garbage = """ | |
| DROP TABLE IF EXISTS demo_data; | |
| CREATE TABLE demo_data ( | |
| event_id INT | |
| ,measured_on DATE | |
| ,measurement INT | |
| ); | |
| INSERT INTO demo_data | |
| VALUES | |
| (1,'2021-06-06',NULL) | |
| ,(1,'2021-06-07', 5) | |
| ,(1,'2021-06-08',NULL) | |
| ,(1,'2021-06-09',NULL) | |
| ,(2,'2021-05-22',42) | |
| ,(2,'2021-05-23',42) | |
| ,(2,'2021-05-25',NULL) | |
| ,(2,'2021-05-26',11) | |
| ,(2,'2021-05-27',NULL) | |
| ,(2,'2021-05-27',NULL) | |
| ,(3,'2021-07-01',NULL) | |
| ,(3,'2021-07-03',NULL); | |
| """ | |
| con = duckdb.connect("duck_test.db") | |
| con.execute(garbage) | |
| # TODO: make this suck less | |
| # con = ibis.sqlite.connect("test.db") | |
| t = ibis.table( | |
| [ | |
| ("event_id", "int32"), | |
| ("measured_on", "date"), | |
| ("measurement", "int32"), | |
| ], | |
| "demo_data", | |
| ) | |
| # t = con.table("demo_data") | |
| # forward fill | |
| # construct a "breaks counter" | |
| # increment a counter every time a non-null value for the | |
| # desired column to ffill is encountered | |
| expr = ( | |
| t.mutate(rowid=ibis.row_number()).mutate( | |
| grouper=lambda t: t.measurement.count().over( | |
| ibis.window(order_by="rowid", following=0) | |
| ) | |
| ) | |
| .mutate( | |
| measurement=lambda t: t.measurement.max().over( | |
| ibis.window(group_by="grouper", following=0) | |
| ) | |
| ) | |
| .sort_by("event_id") | |
| ) | |
| expr = t.measurement.ffill() | |
| expr = ( | |
| t.mutate( | |
| grouper=lambda t: t.measurement.count().over( | |
| ibis.window( | |
| group_by="event_id", | |
| order_by="measured_on", | |
| following=0, | |
| ) | |
| ) | |
| ) | |
| .mutate( | |
| measurement=lambda t: t.measurement.max().over( | |
| ibis.window( | |
| group_by=["event_id", "grouper"], | |
| following=0, | |
| ) | |
| ) | |
| ) | |
| .sort_by(["event_id", "measured_on"]) | |
| ) | |
| sql = ibis.postgres.compile(expr).compile( | |
| compile_kwargs={"literal_binds": True} | |
| ) | |
| print(con.execute(str(sql)).fetchdf()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment