Last active
May 13, 2023 00:46
-
-
Save carlineng/82866612beeb86632f8c23d458a1a5bc to your computer and use it in GitHub Desktop.
Nested sub-totals in SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Using DuckDB SQL dialect | |
-- Data for these queries can be found here: https://github.com/malloydata/malloy-samples/tree/main/duckdb/ecommerce | |
-- Question: I want to calculate year-over-year sales for this e-commerce dataset. | |
-- Then, for each product_department, I want to calculate year-over-year sales in that department, | |
-- and the contribution of that department to the overall year-over-year sales growth. | |
-- Then, for each product_category, I want to calculate year-over-year sales for that category, | |
-- and the contribution of that category to the departmental year-over-year sales growth. | |
-- How do I do that in SQL? | |
-- Two approaches. One with window functions, one with GROUP BY ROLLUP. | |
-- One dimension, with window functions | |
WITH sales_by_department AS ( | |
SELECT | |
b.product_department | |
, sum(case when date_trunc('year', a.created_at) = '2021-01-01' then a.sale_price else 0 end) as sales_2021 | |
, sum(case when date_trunc('year', a.created_at) = '2022-01-01' then a.sale_price else 0 end) as sales_2022 | |
FROM read_parquet('order_items.parquet') a | |
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id | |
GROUP BY 1 | |
) | |
SELECT | |
sum(sales_2022) over (partition by 1) as total_sales | |
, sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1) as total_sales_growth | |
, sum(sales_2022) over (partition by 1) / sum(sales_2021) over (partition by 1) - 1 as total_sales_yoy | |
, '-' as _ | |
, product_department | |
, sales_2022 AS department_sales | |
, sales_2022 - sales_2021 as department_sales_growth | |
, sales_2022 / sales_2021 - 1 as department_sales_yoy | |
-- DuckDB and BigQuery don't support RATIO_TO_REPORT :( | |
, (sales_2022 - sales_2021) / (sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1)) as department_growth_contribution | |
FROM sales_by_department | |
ORDER BY product_department, department_sales_growth DESC | |
; | |
-- Two dimensions, with window functions | |
WITH sales_by_department_and_category AS ( | |
SELECT | |
b.product_department | |
, b.product_category | |
, sum(case when date_trunc('year', a.created_at) = '2021-01-01' then a.sale_price else 0 end) as sales_2021 | |
, sum(case when date_trunc('year', a.created_at) = '2022-01-01' then a.sale_price else 0 end) as sales_2022 | |
FROM read_parquet('order_items.parquet') a | |
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id | |
group by 1,2 | |
) | |
SELECT | |
sum(sales_2022) over (partition by 1) as total_sales_2022 | |
, sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1) as total_sales_growth | |
, sum(sales_2022) over (partition by 1) / sum(sales_2021) over (partition by 1) - 1 as total_sales_yoy | |
, '-' as _ | |
, product_department | |
, sum(sales_2022) over (partition by product_department) as department_sales_2022 | |
, sum(sales_2022) over (partition by product_department) - sum(sales_2021) over (partition by product_department) as department_sales_growth | |
, sum(sales_2022) over (partition by product_department) / sum(sales_2021) over (partition by product_department) - 1 as department_sales_yoy | |
, (sum(sales_2022) over (partition by product_department) - sum(sales_2021) over (partition by product_department)) / | |
(sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1)) | |
as department_sales_contribution | |
, '-' as __ | |
, product_category | |
, sales_2022 as category_sales_2022 | |
, sales_2022 - sales_2021 as category_sales_growth | |
, sales_2022 / sales_2021 - 1 as category_sales_yoy | |
, (sales_2022 - sales_2021) / | |
(sum(sales_2022) over (partition by product_department) - sum(sales_2021) over (partition by product_department)) | |
as category_sales_contribution | |
FROM sales_by_department_and_category | |
ORDER BY product_department, category_sales_growth DESC | |
; | |
-- One dimension with GROUP BY ROLLUP | |
WITH rollup_totals AS ( | |
SELECT | |
b.product_department | |
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2021-01-01' THEN a.sale_price ELSE 0 END) as sales_2021 | |
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2022-01-01' THEN a.sale_price ELSE 0 END) as sales_2022 | |
FROM read_parquet('order_items.parquet') a | |
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id | |
GROUP BY ROLLUP (1) | |
) | |
SELECT | |
product_department | |
, sales_2022 | |
, sales_2022 - sales_2021 AS sales_growth | |
, sales_2022 / sales_2021 - 1 AS sales_yoy | |
, (sales_2022 - sales_2021) / SUM(CASE WHEN product_department IS NULL THEN sales_2022 - sales_2021 ELSE 0 END) OVER (PARTITION BY 1) AS growth_contribution | |
FROM rollup_totals | |
; | |
-- Two dimensions with GROUP BY ROLLUP | |
WITH rollup_totals AS ( | |
SELECT | |
b.product_department | |
, b.product_category | |
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2021-01-01' THEN a.sale_price ELSE 0 END) as sales_2021 | |
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2022-01-01' THEN a.sale_price ELSE 0 END) as sales_2022 | |
FROM read_parquet('order_items.parquet') a | |
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id | |
GROUP BY ROLLUP (1,2) | |
) | |
SELECT | |
product_department | |
, product_category | |
, sales_2022 | |
, sales_2022 - sales_2021 AS sales_growth | |
, sales_2022 / sales_2021 - 1 AS sales_yoy | |
, (sales_2022 - sales_2021) / | |
CASE WHEN product_department IS NULL THEN NULL | |
WHEN product_category IS NULL THEN | |
SUM(CASE WHEN product_department IS NULL THEN sales_2022 - sales_2021 ELSE 0 END) OVER (PARTITION BY 1) | |
WHEN product_category IS NOT NULL THEN | |
SUM(CASE WHEN product_category IS NULL THEN sales_2022 - sales_2021 ELSE 0 END) OVER (PARTITION BY product_department) | |
ELSE NULL END | |
AS growth_contribution | |
FROM rollup_totals | |
ORDER BY 1,4 DESC | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment