Skip to content

Instantly share code, notes, and snippets.

@carlineng
Last active December 4, 2022 07:01
Show Gist options
  • Save carlineng/9d23494ff83319d7d5fcf38f76a3ffd2 to your computer and use it in GitHub Desktop.
Save carlineng/9d23494ff83319d7d5fcf38f76a3ffd2 to your computer and use it in GitHub Desktop.
TPC-DS Query 4
import "tpcds.malloy"
query: all_sales -> {
declare:
catalog_total_sales_2001 is total_sales { where: date_dim.d_year = 2001 and sale_type = 'c'}
catalog_total_sales_2002 is total_sales { where: date_dim.d_year = 2002 and sale_type = 'c'}
web_total_sales_2001 is total_sales { where: date_dim.d_year = 2001 and sale_type = 'w'}
web_total_sales_2002 is total_sales { where: date_dim.d_year = 2002 and sale_type = 'w'}
store_total_sales_2001 is total_sales { where: date_dim.d_year = 2001 and sale_type = 's'}
store_total_sales_2002 is total_sales { where: date_dim.d_year = 2002 and sale_type = 's'}
catalog_yoy is catalog_total_sales_2002 / catalog_total_sales_2001
store_yoy is store_total_sales_2002 / store_total_sales_2001
web_yoy is web_total_sales_2002 / web_total_sales_2001
group_by:
customer.c_customer_id
customer.c_first_name
customer.c_last_name
customer.c_preferred_cust_flag
having:
catalog_yoy > store_yoy
and catalog_yoy > web_yoy
and store_total_sales_2001 > 0
and store_total_sales_2002 > 0
and web_total_sales_2001 > 0
and web_total_sales_2002 > 0
and catalog_total_sales_2001 > 0
and catalog_total_sales_2002 > 0
order_by:
c_customer_id
c_first_name
c_last_name
c_preferred_cust_flag
}
WITH year_total AS
(SELECT c_customer_id customer_id,
c_first_name customer_first_name,
c_last_name customer_last_name,
c_preferred_cust_flag customer_preferred_cust_flag,
c_birth_country customer_birth_country,
c_login customer_login,
c_email_address customer_email_address,
d_year dyear,
sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total,
's' sale_type
FROM customer,
store_sales,
date_dim
WHERE c_customer_sk = ss_customer_sk
AND ss_sold_date_sk = d_date_sk
GROUP BY c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year
UNION ALL SELECT c_customer_id customer_id,
c_first_name customer_first_name,
c_last_name customer_last_name,
c_preferred_cust_flag customer_preferred_cust_flag,
c_birth_country customer_birth_country,
c_login customer_login,
c_email_address customer_email_address,
d_year dyear,
sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2)) year_total,
'c' sale_type
FROM customer,
catalog_sales,
date_dim
WHERE c_customer_sk = cs_bill_customer_sk
AND cs_sold_date_sk = d_date_sk
GROUP BY c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year
UNION ALL SELECT c_customer_id customer_id,
c_first_name customer_first_name,
c_last_name customer_last_name,
c_preferred_cust_flag customer_preferred_cust_flag,
c_birth_country customer_birth_country,
c_login customer_login,
c_email_address customer_email_address,
d_year dyear,
sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2)) year_total,
'w' sale_type
FROM customer,
web_sales,
date_dim
WHERE c_customer_sk = ws_bill_customer_sk
AND ws_sold_date_sk = d_date_sk
GROUP BY c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year)
SELECT t_s_secyear.customer_id,
t_s_secyear.customer_first_name,
t_s_secyear.customer_last_name,
t_s_secyear.customer_preferred_cust_flag
FROM year_total t_s_firstyear,
year_total t_s_secyear,
year_total t_c_firstyear,
year_total t_c_secyear,
year_total t_w_firstyear,
year_total t_w_secyear
WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id
AND t_s_firstyear.customer_id = t_c_secyear.customer_id
AND t_s_firstyear.customer_id = t_c_firstyear.customer_id
AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
AND t_s_firstyear.customer_id = t_w_secyear.customer_id
AND t_s_firstyear.sale_type = 's'
AND t_c_firstyear.sale_type = 'c'
AND t_w_firstyear.sale_type = 'w'
AND t_s_secyear.sale_type = 's'
AND t_c_secyear.sale_type = 'c'
AND t_w_secyear.sale_type = 'w'
AND t_s_firstyear.dyear = 2001
AND t_s_secyear.dyear = 2001+1
AND t_c_firstyear.dyear = 2001
AND t_c_secyear.dyear = 2001+1
AND t_w_firstyear.dyear = 2001
AND t_w_secyear.dyear = 2001+1
AND t_s_firstyear.year_total > 0
AND t_c_firstyear.year_total > 0
AND t_w_firstyear.year_total > 0
AND CASE
WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total
ELSE NULL
END > CASE
WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total
ELSE NULL
END
AND CASE
WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total
ELSE NULL
END > CASE
WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total
ELSE NULL
END
ORDER BY t_s_secyear.customer_id NULLS FIRST,
t_s_secyear.customer_first_name NULLS FIRST,
t_s_secyear.customer_last_name NULLS FIRST,
t_s_secyear.customer_preferred_cust_flag NULLS FIRST
LIMIT 100
sql: all_sales_qry is ||
SELECT
ws_sold_date_sk as sold_date_sk,
ws_sold_time_sk as sold_time_sk,
ws_item_sk as item_sk,
ws_bill_customer_sk as customer_sk,
ws_ext_sales_price as sales_price,
ws_wholesale_cost as wholesale_cost,
ws_list_price as list_price,
ws_sales_price as sales_price,
ws_ext_discount_amt as ext_discount_amt,
ws_ext_sales_price as ext_sales_price,
ws_ext_wholesale_cost as ext_wholesale_cost,
ws_ext_list_price as ext_list_price,
ws_ext_tax as ext_tax,
ws_coupon_amt as coupon_amt,
ws_net_paid as net_paid,
ws_net_paid_inc_tax as net_paid_inc_tax,
ws_net_profit as net_profit,
'w' as sale_type
FROM read_parquet('../data/web_sales.parquet')
UNION ALL
SELECT
cs_sold_date_sk as sold_date_sk,
cs_sold_time_sk as sold_time_sk,
cs_item_sk as item_sk,
cs_bill_customer_sk as customer_sk,
cs_ext_sales_price as sales_price,
cs_wholesale_cost as wholesale_cost,
cs_list_price as list_price,
cs_sales_price as sales_price,
cs_ext_discount_amt as ext_discount_amt,
cs_ext_sales_price as ext_sales_price,
cs_ext_wholesale_cost as ext_wholesale_cost,
cs_ext_list_price as ext_list_price,
cs_ext_tax as ext_tax,
cs_coupon_amt as coupon_amt,
cs_net_paid as net_paid,
cs_net_paid_inc_tax as net_paid_inc_tax,
cs_net_profit as net_profit,
'c' as sale_type
FROM read_parquet('../data/catalog_sales.parquet')
UNION ALL
SELECT
ss_sold_date_sk as sold_date_sk,
ss_sold_time_sk as sold_time_sk,
ss_item_sk as item_sk,
ss_customer_sk as customer_sk,
ss_ext_sales_price as sales_price,
ss_wholesale_cost as wholesale_cost,
ss_list_price as list_price,
ss_sales_price as sales_price,
ss_ext_discount_amt as ext_discount_amt,
ss_ext_sales_price as ext_sales_price,
ss_ext_wholesale_cost as ext_wholesale_cost,
ss_ext_list_price as ext_list_price,
ss_ext_tax as ext_tax,
ss_coupon_amt as coupon_amt,
ss_net_paid as net_paid,
ss_net_paid_inc_tax as net_paid_inc_tax,
ss_net_profit as net_profit,
's' as sale_type
FROM read_parquet('../data/store_sales.parquet')
;;
on "duckdb"
;
source: all_sales is from_sql(all_sales_qry) {
join_one: date_dim on sold_date_sk = date_dim.d_date_sk
join_one: customer on customer_sk = customer.c_customer_sk
join_one: item on item_sk = item.i_item_sk
measure: total_sales is sum((ext_list_price - ext_wholesale_cost - ext_discount_amt + ext_sales_price) / 2)
measure: total_ext_sales_price is sum(ext_sales_price)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment