asdf
Last active
November 13, 2019 04:28
-
-
Save kevlarr/98e0194aaa773098b48ebd8ed6f03a7e to your computer and use it in GitHub Desktop.
SQL-y things
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* *************************************************** | |
Using HAVING to filter by aggregate function results | |
means not having to select and filter from a subquery. | |
I wasn't sure if repeating the agg function would | |
re-run it, so I built a large temporary table and | |
timed queries with and without the HAVING. | |
TL;DR: | |
`SELECT agg(..) FROM .. GROUP BY .. HAVING agg(..)` | |
~~ does not~~ repeat the aggregate function call, | |
so this is a pretty awesome trick! | |
*************************************************** */ | |
/* | |
Create a temporary table and join several times | |
to create a large, uneven data set: | |
i1 | i2 | i3 | |
----+----+---- | |
1 | 1 | 1 | |
2 | 1 | 1 | |
2 | 2 | 1 | |
2 | 2 | 2 | |
3 | 1 | 1 | |
3 | 2 | 1 | |
3 | 2 | 2 | |
3 | 3 | 1 | |
3 | 3 | 2 | |
3 | 3 | 3 | |
4 | 1 | 1 | |
... | |
10 | 10 | 8 | |
10 | 10 | 9 | |
10 | 10 | 10 | |
(220 rows) | |
*/ | |
with t as (select generate_series(1, 10) as i) | |
select t1.i as i1, t2.i as i2, t3.i as i3 | |
from t t1 | |
join t as t2 on t2.i <= t1.i | |
join t as t3 on t3.i <= t2.i | |
order by t1.i, t2.i, t3.i | |
; | |
-- Up the size and do a GROUP BY w/o aggregate functions | |
-- to use as the "base" measurement | |
explain analyze | |
with t as (select generate_series(1, 500) as i) | |
select t1.i as i1 | |
from t t1 | |
join t as t2 on t2.i <= t1.i | |
join t as t3 on t3.i <= t2.i | |
group by t1.i | |
order by t1.i | |
; | |
/* | |
QUERY PLAN | |
------------------------------------------------------------------------------------------------------------------------------------ | |
Sort (cost=6604170.49..6604170.99 rows=200 width=4) (actual time=30567.533..30567.574 rows=500 loops=1) | |
Sort Key: t1.i | |
Sort Method: quicksort Memory: 48kB | |
CTE t | |
-> ProjectSet (cost=0.00..5.02 rows=1000 width=4) (actual time=0.015..0.348 rows=500 loops=1) | |
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.007..0.007 rows=1 loops=1) | |
-> HashAggregate (cost=6604155.83..6604157.83 rows=200 width=4) (actual time=30567.197..30567.320 rows=500 loops=1) | |
Group Key: t1.i | |
-> Nested Loop (cost=0.00..6326378.33 rows=111111000 width=4) (actual time=0.065..23867.591 rows=20958500 loops=1) | |
Join Filter: (t3.i <= t2.i) | |
Rows Removed by Join Filter: 41666500 | |
-> CTE Scan on t t3 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.029..0.567 rows=500 loops=1) | |
-> Materialize (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..23.979 rows=125250 loops=500) | |
-> Nested Loop (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.022..110.981 rows=125250 loops=1) | |
Join Filter: (t2.i <= t1.i) | |
Rows Removed by Join Filter: 124750 | |
-> CTE Scan on t t1 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.007..0.157 rows=500 loops=1) | |
-> CTE Scan on t t2 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.100 rows=500 loops=500) | |
Planning Time: 0.285 ms | |
Execution Time: 30568.055 ms | |
*/ | |
-- Add `sum` aggregations | |
explain analyze | |
with t as (select generate_series(1, 500) as i) | |
select t1.i as i1, sum(t2.i) as i2_sum, sum(t3.i) as i3_sum | |
from t t1 | |
join t as t2 on t2.i <= t1.i | |
join t as t3 on t3.i <= t2.i | |
group by t1.i | |
order by t1.i | |
; | |
/* | |
QUERY PLAN | |
------------------------------------------------------------------------------------------------------------------------------------ | |
Sort (cost=7159725.49..7159725.99 rows=200 width=20) (actual time=31786.370..31786.411 rows=500 loops=1) | |
Sort Key: t1.i | |
Sort Method: quicksort Memory: 64kB | |
CTE t | |
-> ProjectSet (cost=0.00..5.02 rows=1000 width=4) (actual time=0.004..0.209 rows=500 loops=1) | |
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.002 rows=1 loops=1) | |
-> HashAggregate (cost=7159710.83..7159712.83 rows=200 width=20) (actual time=31785.967..31786.132 rows=500 loops=1) | |
Group Key: t1.i | |
-> Nested Loop (cost=0.00..6326378.33 rows=111111000 width=12) (actual time=0.019..23993.921 rows=20958500 loops=1) | |
Join Filter: (t3.i <= t2.i) | |
Rows Removed by Join Filter: 41666500 | |
-> CTE Scan on t t3 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.008..0.526 rows=500 loops=1) | |
-> Materialize (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..23.715 rows=125250 loops=500) | |
-> Nested Loop (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.006..100.399 rows=125250 loops=1) | |
Join Filter: (t2.i <= t1.i) | |
Rows Removed by Join Filter: 124750 | |
-> CTE Scan on t t1 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.001..0.115 rows=500 loops=1) | |
-> CTE Scan on t t2 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.090 rows=500 loops=500) | |
Planning Time: 0.169 ms | |
Execution Time: 31786.840 ms | |
*/ | |
-- Add `HAVING` clauses for sums | |
explain analyze | |
with t as (select generate_series(1, 500) as i) | |
select t1.i as i1, sum(t2.i) as i2_sum, sum(t3.i) as i3_sum | |
from t t1 | |
join t as t2 on t2.i <= t1.i | |
join t as t3 on t3.i <= t2.i | |
group by t1.i | |
having sum(t2.i) > 40000 and sum(t3.i) > 30000 | |
order by t1.i | |
; | |
/* | |
QUERY PLAN | |
------------------------------------------------------------------------------------------------------------------------------------ | |
Sort (cost=7715274.34..7715274.40 rows=22 width=20) (actual time=32677.428..32677.465 rows=445 loops=1) | |
Sort Key: t1.i | |
Sort Method: quicksort Memory: 59kB | |
CTE t | |
-> ProjectSet (cost=0.00..5.02 rows=1000 width=4) (actual time=0.004..0.174 rows=500 loops=1) | |
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.002..0.002 rows=1 loops=1) | |
-> HashAggregate (cost=7715265.83..7715268.83 rows=22 width=20) (actual time=32677.017..32677.203 rows=445 loops=1) | |
Group Key: t1.i | |
Filter: ((sum(t2.i) > 40000) AND (sum(t3.i) > 30000)) | |
Rows Removed by Filter: 55 | |
-> Nested Loop (cost=0.00..6326378.33 rows=111111000 width=12) (actual time=0.018..24711.548 rows=20958500 loops=1) | |
Join Filter: (t3.i <= t2.i) | |
Rows Removed by Join Filter: 41666500 | |
-> CTE Scan on t t3 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.008..0.592 rows=500 loops=1) | |
-> Materialize (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..24.599 rows=125250 loops=500) | |
-> Nested Loop (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.005..98.609 rows=125250 loops=1) | |
Join Filter: (t2.i <= t1.i) | |
Rows Removed by Join Filter: 124750 | |
-> CTE Scan on t t1 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.001..0.106 rows=500 loops=1) | |
-> CTE Scan on t t2 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.087 rows=500 loops=500) | |
Planning Time: 0.220 ms | |
Execution Time: 32679.939 ms | |
*/ | |
/* | |
CONCLUSION: | |
Including a `HAVING` clause did not appear to re-run any aggregations | |
(as one would hope), as the timing differences appeared during the | |
`MATERIALIZE` phase prior to any filters. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment