Skip to content

Instantly share code, notes, and snippets.

@ianmcook
Last active April 12, 2023 18:07
Show Gist options
  • Select an option

  • Save ianmcook/5b01b1f0fff6dc9dc45103ac01b3e25d to your computer and use it in GitHub Desktop.

Select an option

Save ianmcook/5b01b1f0fff6dc9dc45103ac01b3e25d to your computer and use it in GitHub Desktop.
Ibis Snowflake TPC-H Query 1
# before running:
# 1. install Ibis and its Snowflake backend: https://ibis-project.org/backends/Snowflake/
# 2. create and activate a Snowflake trial account
# 3. set environment variables SNOWSQL_USER, SNOWSQL_PWD, SNOWSQL_ACCOUNT
import os
import ibis
from ibis import _
ibis.options.interactive = True
# connect to Snowflake
conn = ibis.snowflake.connect(
user=os.getenv('SNOWSQL_USER'),
password=os.getenv('SNOWSQL_PWD'),
account=os.getenv('SNOWSQL_ACCOUNT'),
database='snowflake_sample_data/tpch_sf1'
)
# assign the LINEITEM table to variable t (an Ibis table object)
t = conn.table('LINEITEM')
# use the Ibis dataframe API to run TPC-H query 1
t.filter(_.L_SHIPDATE.cast('date') <= ibis.date('1998-12-01') + ibis.interval(days=90)) \
.mutate(DISCOUNT_PRICE = _.L_EXTENDEDPRICE * (1 - _.L_DISCOUNT)) \
.mutate(CHARGE = _.DISCOUNT_PRICE * (1 + _.L_TAX)) \
.group_by(['L_RETURNFLAG', 'L_LINESTATUS']) \
.aggregate(
sum_qty=_.L_QUANTITY.sum(),
sum_base_price=_.L_EXTENDEDPRICE.sum(),
sum_disc_price=_.DISCOUNT_PRICE.sum(),
sum_charge=_.CHARGE.sum(),
avg_qty=_.L_QUANTITY.mean(),
avg_price=_.L_EXTENDEDPRICE.mean(),
avg_disc=_.L_DISCOUNT.mean(),
count_order=_.count()
) \
.order_by(['L_RETURNFLAG', 'L_LINESTATUS'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment