Skip to content

Instantly share code, notes, and snippets.

@carlineng
Last active May 13, 2023 00:46
Show Gist options
  • Save carlineng/82866612beeb86632f8c23d458a1a5bc to your computer and use it in GitHub Desktop.
Save carlineng/82866612beeb86632f8c23d458a1a5bc to your computer and use it in GitHub Desktop.
Nested sub-totals in SQL
-- Using DuckDB SQL dialect
-- Data for these queries can be found here: https://github.com/malloydata/malloy-samples/tree/main/duckdb/ecommerce
-- Question: I want to calculate year-over-year sales for this e-commerce dataset.
-- Then, for each product_department, I want to calculate year-over-year sales in that department,
-- and the contribution of that department to the overall year-over-year sales growth.
-- Then, for each product_category, I want to calculate year-over-year sales for that category,
-- and the contribution of that category to the departmental year-over-year sales growth.
-- How do I do that in SQL?
-- Two approaches. One with window functions, one with GROUP BY ROLLUP.
-- One dimension, with window functions
WITH sales_by_department AS (
SELECT
b.product_department
, sum(case when date_trunc('year', a.created_at) = '2021-01-01' then a.sale_price else 0 end) as sales_2021
, sum(case when date_trunc('year', a.created_at) = '2022-01-01' then a.sale_price else 0 end) as sales_2022
FROM read_parquet('order_items.parquet') a
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id
GROUP BY 1
)
SELECT
sum(sales_2022) over (partition by 1) as total_sales
, sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1) as total_sales_growth
, sum(sales_2022) over (partition by 1) / sum(sales_2021) over (partition by 1) - 1 as total_sales_yoy
, '-' as _
, product_department
, sales_2022 AS department_sales
, sales_2022 - sales_2021 as department_sales_growth
, sales_2022 / sales_2021 - 1 as department_sales_yoy
-- DuckDB and BigQuery don't support RATIO_TO_REPORT :(
, (sales_2022 - sales_2021) / (sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1)) as department_growth_contribution
FROM sales_by_department
ORDER BY product_department, department_sales_growth DESC
;
-- Two dimensions, with window functions
WITH sales_by_department_and_category AS (
SELECT
b.product_department
, b.product_category
, sum(case when date_trunc('year', a.created_at) = '2021-01-01' then a.sale_price else 0 end) as sales_2021
, sum(case when date_trunc('year', a.created_at) = '2022-01-01' then a.sale_price else 0 end) as sales_2022
FROM read_parquet('order_items.parquet') a
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id
group by 1,2
)
SELECT
sum(sales_2022) over (partition by 1) as total_sales_2022
, sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1) as total_sales_growth
, sum(sales_2022) over (partition by 1) / sum(sales_2021) over (partition by 1) - 1 as total_sales_yoy
, '-' as _
, product_department
, sum(sales_2022) over (partition by product_department) as department_sales_2022
, sum(sales_2022) over (partition by product_department) - sum(sales_2021) over (partition by product_department) as department_sales_growth
, sum(sales_2022) over (partition by product_department) / sum(sales_2021) over (partition by product_department) - 1 as department_sales_yoy
, (sum(sales_2022) over (partition by product_department) - sum(sales_2021) over (partition by product_department)) /
(sum(sales_2022) over (partition by 1) - sum(sales_2021) over (partition by 1))
as department_sales_contribution
, '-' as __
, product_category
, sales_2022 as category_sales_2022
, sales_2022 - sales_2021 as category_sales_growth
, sales_2022 / sales_2021 - 1 as category_sales_yoy
, (sales_2022 - sales_2021) /
(sum(sales_2022) over (partition by product_department) - sum(sales_2021) over (partition by product_department))
as category_sales_contribution
FROM sales_by_department_and_category
ORDER BY product_department, category_sales_growth DESC
;
-- One dimension with GROUP BY ROLLUP
WITH rollup_totals AS (
SELECT
b.product_department
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2021-01-01' THEN a.sale_price ELSE 0 END) as sales_2021
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2022-01-01' THEN a.sale_price ELSE 0 END) as sales_2022
FROM read_parquet('order_items.parquet') a
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id
GROUP BY ROLLUP (1)
)
SELECT
product_department
, sales_2022
, sales_2022 - sales_2021 AS sales_growth
, sales_2022 / sales_2021 - 1 AS sales_yoy
, (sales_2022 - sales_2021) / SUM(CASE WHEN product_department IS NULL THEN sales_2022 - sales_2021 ELSE 0 END) OVER (PARTITION BY 1) AS growth_contribution
FROM rollup_totals
;
-- Two dimensions with GROUP BY ROLLUP
WITH rollup_totals AS (
SELECT
b.product_department
, b.product_category
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2021-01-01' THEN a.sale_price ELSE 0 END) as sales_2021
, SUM(CASE WHEN date_trunc('year', a.created_at) = '2022-01-01' THEN a.sale_price ELSE 0 END) as sales_2022
FROM read_parquet('order_items.parquet') a
LEFT JOIN read_parquet('inventory_items.parquet') b on a.inventory_item_id = b.id
GROUP BY ROLLUP (1,2)
)
SELECT
product_department
, product_category
, sales_2022
, sales_2022 - sales_2021 AS sales_growth
, sales_2022 / sales_2021 - 1 AS sales_yoy
, (sales_2022 - sales_2021) /
CASE WHEN product_department IS NULL THEN NULL
WHEN product_category IS NULL THEN
SUM(CASE WHEN product_department IS NULL THEN sales_2022 - sales_2021 ELSE 0 END) OVER (PARTITION BY 1)
WHEN product_category IS NOT NULL THEN
SUM(CASE WHEN product_category IS NULL THEN sales_2022 - sales_2021 ELSE 0 END) OVER (PARTITION BY product_department)
ELSE NULL END
AS growth_contribution
FROM rollup_totals
ORDER BY 1,4 DESC
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment