Skip to content

Instantly share code, notes, and snippets.

@carlineng
Last active December 2, 2022 01:40
Show Gist options
  • Save carlineng/2a8f04a3b12e639f490f3126ef74a785 to your computer and use it in GitHub Desktop.
Save carlineng/2a8f04a3b12e639f490f3126ef74a785 to your computer and use it in GitHub Desktop.
TPC-DS Query 2
query: yoy_weekly_sales is weekly_sales {
join_many: date_dim on d_week_seq = date_dim.d_week_seq
join_one: weekly_sales on d_week_seq = weekly_sales.d_week_seq - 53
} -> {
group_by:
d_week_seq
r_sun is sun_sales / weekly_sales.sun_sales
r_mon is mon_sales / weekly_sales.mon_sales
r_tue is tue_sales / weekly_sales.tue_sales
r_wed is wed_sales / weekly_sales.wed_sales
r_thu is thu_sales / weekly_sales.thu_sales
r_fri is fri_sales / weekly_sales.fri_sales
r_sat is sat_sales / weekly_sales.sat_sales
where: date_dim.d_year = 2001
order_by: d_week_seq
}
WITH wscs AS
(SELECT sold_date_sk,
sales_price
FROM
(SELECT ws_sold_date_sk sold_date_sk,
ws_ext_sales_price sales_price
FROM web_sales
UNION ALL SELECT cs_sold_date_sk sold_date_sk,
cs_ext_sales_price sales_price
FROM catalog_sales) sq1),
wswscs AS
(SELECT d_week_seq,
sum(CASE
WHEN (d_day_name='Sunday') THEN sales_price
ELSE NULL
END) sun_sales,
sum(CASE
WHEN (d_day_name='Monday') THEN sales_price
ELSE NULL
END) mon_sales,
sum(CASE
WHEN (d_day_name='Tuesday') THEN sales_price
ELSE NULL
END) tue_sales,
sum(CASE
WHEN (d_day_name='Wednesday') THEN sales_price
ELSE NULL
END) wed_sales,
sum(CASE
WHEN (d_day_name='Thursday') THEN sales_price
ELSE NULL
END) thu_sales,
sum(CASE
WHEN (d_day_name='Friday') THEN sales_price
ELSE NULL
END) fri_sales,
sum(CASE
WHEN (d_day_name='Saturday') THEN sales_price
ELSE NULL
END) sat_sales
FROM wscs,
date_dim
WHERE d_date_sk = sold_date_sk
GROUP BY d_week_seq)
SELECT d_week_seq1,
round(sun_sales1/sun_sales2, 2) r1,
round(mon_sales1/mon_sales2, 2) r2,
round(tue_sales1/tue_sales2, 2) r3,
round(wed_sales1/wed_sales2, 2) r4,
round(thu_sales1/thu_sales2, 2) r5,
round(fri_sales1/fri_sales2, 2) r6,
round(sat_sales1/sat_sales2, 2)
FROM
(SELECT wswscs.d_week_seq d_week_seq1,
sun_sales sun_sales1,
mon_sales mon_sales1,
tue_sales tue_sales1,
wed_sales wed_sales1,
thu_sales thu_sales1,
fri_sales fri_sales1,
sat_sales sat_sales1
FROM wswscs,
date_dim
WHERE date_dim.d_week_seq = wswscs.d_week_seq
AND d_year = 2001) y,
(SELECT wswscs.d_week_seq d_week_seq2,
sun_sales sun_sales2,
mon_sales mon_sales2,
tue_sales tue_sales2,
wed_sales wed_sales2,
thu_sales thu_sales2,
fri_sales fri_sales2,
sat_sales sat_sales2
FROM wswscs,
date_dim -- This join fans out the final dataset
WHERE date_dim.d_week_seq = wswscs.d_week_seq. -- one-to-many relationship on d_week_seq
AND d_year = 2001+1) z
WHERE d_week_seq1 = d_week_seq2-53
ORDER BY d_week_seq1 NULLS FIRST;
sql: web_catalog_sales_qry is ||
SELECT
ws_sold_date_sk as sold_date_sk,
ws_ext_sales_price as sales_price
FROM read_parquet('../data/web_sales.parquet')
UNION all
SELECT
cs_sold_date_sk as sold_date_sk,
cs_ext_sales_price as sales_price
FROM read_parquet('../data/catalog_sales.parquet')
;;
on "duckdb"
-- Create a Source from the SQL query above
source: web_catalog_sales is from_sql(web_catalog_sales_qry) {
join_one: date_dim on sold_date_sk = date_dim.d_date_sk
}
-- Create another Source that does the by-week aggregation
source: weekly_sales is from(
web_catalog_sales -> {
group_by:
date_dim.d_week_seq
aggregate:
sun_sales is sum(sales_price) { where: date_dim.d_day_name = 'Sunday' }
mon_sales is sum(sales_price) { where: date_dim.d_day_name = 'Monday' }
tue_sales is sum(sales_price) { where: date_dim.d_day_name = 'Tuesday' }
wed_sales is sum(sales_price) { where: date_dim.d_day_name = 'Wednesday' }
thu_sales is sum(sales_price) { where: date_dim.d_day_name = 'Thursday' }
fri_sales is sum(sales_price) { where: date_dim.d_day_name = 'Friday' }
sat_sales is sum(sales_price) { where: date_dim.d_day_name = 'Saturday' }
}
) {
primary_key: d_week_seq
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment