Skip to content

Instantly share code, notes, and snippets.

@layandreas
Last active March 30, 2025 22:01
Show Gist options
  • Save layandreas/d2050e3113c4cbd1627a3cca58040eb3 to your computer and use it in GitHub Desktop.
Save layandreas/d2050e3113c4cbd1627a3cca58040eb3 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "duckdb==1.2.1",
# "gcsfs>=2025.3.0",
# "ibis-framework[duckdb]==10.4.0",
# "pins==0.8.7",
# ]
# ///
import ibis
from typing import Callable
def main() -> None:
con = ibis.connect("duckdb://penguins.ddb")
con.create_table(
"penguins", ibis.examples.penguins.fetch().to_pyarrow(), overwrite=True
)
print("Tables in duckdb:")
print("\n")
print(con.list_tables())
print("\n")
penguins = con.table("penguins")
print("Initial penguins table:")
print("\n")
print(penguins.to_pandas())
print("\n")
penguins = _pipe(
initial_table=penguins,
funcs=[_select_longest_bill_by_species, _add_json_payload_column],
)
print("Compiled query:")
print("\n")
print(ibis.to_sql(penguins))
print("\n")
con.disconnect()
def _select_longest_bill_by_species(penguins: ibis.Table) -> ibis.Table:
penguins = (
penguins.mutate(
rn=ibis.row_number().over(
group_by=[ibis._["species"]],
order_by=ibis.desc(ibis._["bill_length_mm"]),
),
)
.filter(ibis._["rn"] == 0)
.drop("rn")
.alias("select_longest_bill_by_species")
)
print("Penguins with longest bill length by species:")
print("\n")
print(penguins.to_pandas())
print("\n")
return penguins
def _add_json_payload_column(penguins: ibis.Table) -> ibis.Table:
penguins = penguins.mutate(
json_payload=ibis.struct(
[("species", ibis._["species"]), ("island", ibis._["island"])]
)
).alias("add_json_payload_column")
print("Penguins data with JSON payload column:")
print("\n")
print(penguins.to_pandas())
print("\n")
return penguins
def _pipe(
initial_table: ibis.Table, funcs: list[Callable[[ibis.Table], ibis.Table]]
) -> ibis.Table:
processed_table = initial_table
for func in funcs:
processed_table = func(processed_table)
return processed_table
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment