Last active
September 26, 2024 11:43
-
-
Save benoittgt/ab72dc4cfedea2a0c6a5ee809d16e04d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Carefull setup script is slow | |
-- SETUP SCRIPT :: START -- | |
DROP TABLE IF EXISTS docs; | |
CREATE TABLE docs ( | |
id SERIAL PRIMARY KEY, | |
type varchar(40) DEFAULT 'pdf' NOT NULL, | |
status varchar(40) NOT NULL, | |
sender_reference varchar(40) NOT NULL, | |
sent_at TIMESTAMPTZ, | |
created_at TIMESTAMPTZ DEFAULT now() NOT NULL | |
); | |
INSERT INTO | |
docs (type, status, sender_reference, sent_at) | |
SELECT | |
('{pdf,doc,raw}'::text[])[ceil(random()*3)], | |
('{sent,draft,suspended}'::text[])[ceil(random()*3)], | |
('{Custom,Client}'::text[])[ceil(random()*2)] || '/' || floor(random() * 2000), | |
(LOCALTIMESTAMP - interval '2 years' * random())::timestamptz | |
FROM generate_series(1, 60000000) g; | |
CREATE INDEX docs_sent_at_idx ON docs USING btree (sender_reference, status, sent_at DESC NULLS LAST); | |
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status); | |
CREATE INDEX docs_sent_at_idx_2 ON docs USING btree (sender_reference, sent_at DESC NULLS LAST); | |
VACUUM(ANALYZE) docs; | |
-- SETUP SCRIPT :: END -- | |
SELECT sender_reference, COUNT(*) FROM docs GROUP BY sender_reference HAVING COUNT(*) > 1000 ORDER BY 2 LIMIT 10; -- grab sender_reference with some documents if needed | |
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) | |
SELECT * FROM docs | |
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0; | |
/* | |
Limit (cost=0.56..26727.94 rows=20 width=38) (actual time=116.926..218.789 rows=20 loops=1) | |
Output: id, type, status, sender_reference, sent_at, created_at | |
Buffers: shared hit=1421 read=45046 | |
-> Index Scan using docs_sent_at_idx_1 on public.docs (cost=0.56..39748945.58 rows=29744 width=38) (actual time=116.924..218.784 rows=20 loops=1) | |
Output: id, type, status, sender_reference, sent_at, created_at | |
Filter: (((docs.status)::text = ANY ('{draft,sent}'::text[])) AND ((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[]))) | |
Rows Removed by Filter: 46354 | |
Buffers: shared hit=1421 read=45046 | |
Query Identifier: -4773896564660341574 | |
Planning Time: 0.282 ms | |
Execution Time: 218.817 ms | |
*/ | |
DROP INDEX docs_sent_at_idx_1; | |
-- redo the query without the index used | |
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) | |
SELECT * FROM docs | |
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0; | |
-- with warm cache, very expensive read, all rows returned then sorted | |
/* | |
Limit (cost=97013.42..97013.47 rows=20 width=38) (actual time=98.236..98.237 rows=20 loops=1) | |
Output: id, type, status, sender_reference, sent_at, created_at | |
Buffers: shared hit=15 read=29030 | |
-> Sort (cost=97013.42..97087.78 rows=29744 width=38) (actual time=98.234..98.235 rows=20 loops=1) | |
Output: id, type, status, sender_reference, sent_at, created_at | |
Sort Key: docs.sent_at DESC NULLS LAST | |
Sort Method: top-N heapsort Memory: 27kB | |
Buffers: shared hit=15 read=29030 | |
-> Bitmap Heap Scan on public.docs (cost=524.25..96221.94 rows=29744 width=38) (actual time=22.013..95.987 rows=29834 loops=1) | |
Output: id, type, status, sender_reference, sent_at, created_at | |
Recheck Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[]))) | |
Heap Blocks: exact=28973 | |
Buffers: shared hit=15 read=29030 | |
-> Bitmap Index Scan on docs_sent_at_idx_3 (cost=0.00..516.81 rows=29744 width=0) (actual time=13.608..13.608 rows=29834 loops=1) | |
Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[]))) | |
Buffers: shared hit=15 read=57 | |
Query Identifier: -4773896564660341574 | |
Planning Time: 0.312 ms | |
Execution Time: 98.320 ms | |
*/ | |
-- Recreate previous deleted index | |
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status); | |
VACUUM(ANALYZE) docs; | |
-- Decompose using UNION ALL. Result will be a little bit different (I think because of sent_at insertion :thinking:) | |
-- but it is the same amount of docs than the previous query. | |
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) | |
SELECT * | |
FROM | |
( | |
( | |
( | |
SELECT * FROM docs | |
WHERE status = 'draft' | |
AND sender_reference = 'Custom/1175' | |
ORDER BY | |
sent_at DESC NULLS LAST | |
LIMIT | |
20 OFFSET 0 | |
) | |
UNION ALL | |
( | |
SELECT * FROM docs | |
WHERE status = 'sent' | |
AND sender_reference = 'Custom/1175' | |
ORDER BY | |
sent_at DESC NULLS LAST | |
LIMIT | |
20 OFFSET 0 | |
) | |
UNION ALL | |
( | |
SELECT * FROM docs | |
WHERE status = 'draft' | |
AND sender_reference = 'Client/362' | |
ORDER BY | |
sent_at DESC NULLS LAST | |
LIMIT | |
20 OFFSET 0 | |
) | |
UNION ALL | |
( | |
SELECT * FROM docs | |
WHERE status = 'sent' | |
AND sender_reference = 'Client/362' | |
ORDER BY | |
sent_at DESC NULLS LAST | |
LIMIT | |
20 OFFSET 0 | |
) | |
UNION ALL | |
( | |
SELECT * FROM docs | |
WHERE status = 'draft' | |
AND sender_reference = 'Custom/280' | |
ORDER BY | |
sent_at DESC NULLS LAST | |
LIMIT | |
20 OFFSET 0 | |
) | |
UNION ALL | |
( | |
SELECT * FROM docs | |
WHERE status = 'sent' | |
AND sender_reference = 'Custom/280' | |
ORDER BY | |
sent_at DESC NULLS LAST | |
LIMIT | |
20 OFFSET 0 | |
) | |
) | |
) docs | |
ORDER BY | |
sent_at DESC NULLS LAST | |
LIMIT | |
20 OFFSET 0; | |
-- Fast query with very low usage of IO or disk read. Good usage of index. | |
/* | |
Limit (cost=3.47..84.20 rows=20 width=38) (actual time=4.340..4.398 rows=20 loops=1) | |
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at | |
Buffers: shared hit=36 read=13 | |
-> Merge Append (cost=3.47..487.86 rows=120 width=38) (actual time=4.338..4.393 rows=20 loops=1) | |
Sort Key: docs_1.sent_at DESC NULLS LAST | |
Buffers: shared hit=36 read=13 | |
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=2.081..2.101 rows=9 loops=1) | |
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at | |
Buffers: shared hit=9 read=4 | |
-> Index Scan using docs_sent_at_idx on public.docs docs_1 (cost=0.56..19763.16 rows=4930 width=38) (actual time=2.079..2.098 rows=9 loops=1) | |
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at | |
Index Cond: (((docs_1.sender_reference)::text = 'Custom/1175'::text) AND ((docs_1.status)::text = 'draft'::text)) | |
Buffers: shared hit=9 read=4 | |
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.204..0.215 rows=6 loops=1) | |
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at | |
Buffers: shared hit=9 read=1 | |
-> Index Scan using docs_sent_at_idx on public.docs docs_2 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.202..0.212 rows=6 loops=1) | |
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at | |
Index Cond: (((docs_2.sender_reference)::text = 'Custom/1175'::text) AND ((docs_2.status)::text = 'sent'::text)) | |
Buffers: shared hit=9 read=1 | |
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.755..0.756 rows=1 loops=1) | |
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at | |
Buffers: shared hit=2 read=3 | |
-> Index Scan using docs_sent_at_idx on public.docs docs_3 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.754..0.754 rows=1 loops=1) | |
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at | |
Index Cond: (((docs_3.sender_reference)::text = 'Client/362'::text) AND ((docs_3.status)::text = 'draft'::text)) | |
Buffers: shared hit=2 read=3 | |
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.195..0.200 rows=3 loops=1) | |
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at | |
Buffers: shared hit=6 read=1 | |
-> Index Scan using docs_sent_at_idx on public.docs docs_4 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.194..0.198 rows=3 loops=1) | |
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at | |
Index Cond: (((docs_4.sender_reference)::text = 'Client/362'::text) AND ((docs_4.status)::text = 'sent'::text)) | |
Buffers: shared hit=6 read=1 | |
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.888..0.897 rows=5 loops=1) | |
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at | |
Buffers: shared hit=6 read=3 | |
-> Index Scan using docs_sent_at_idx on public.docs docs_5 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.887..0.895 rows=5 loops=1) | |
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at | |
Index Cond: (((docs_5.sender_reference)::text = 'Custom/280'::text) AND ((docs_5.status)::text = 'draft'::text)) | |
Buffers: shared hit=6 read=3 | |
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.207..0.208 rows=1 loops=1) | |
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at | |
Buffers: shared hit=4 read=1 | |
-> Index Scan using docs_sent_at_idx on public.docs docs_6 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.206..0.206 rows=1 loops=1) | |
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at | |
Index Cond: (((docs_6.sender_reference)::text = 'Custom/280'::text) AND ((docs_6.status)::text = 'sent'::text)) | |
Buffers: shared hit=4 read=1 | |
Query Identifier: -998395605132531303 | |
Planning: | |
Buffers: shared hit=5 | |
Planning Time: 1.633 ms | |
Execution Time: 4.498 ms | |
*/ | |
SELECT version(); | |
-- PostgreSQL 14.8 (Homebrew) on aarch64-apple-darwin22.4.0, compiled by Apple clang version 14.0.3 (clang-1403.0.22.14.1), 64-bit |
Output on the @FranckPachot query on PostgreSQL 16.1
Limit (cost=492.25..492.30 rows=20 width=42) (actual time=0.295..0.298 rows=20 loops=1)
Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
Buffers: shared hit=146
-> Sort (cost=492.25..492.55 rows=120 width=42) (actual time=0.294..0.296 rows=20 loops=1)
Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
Sort Key: docs.sent_at DESC NULLS LAST
Sort Method: top-N heapsort Memory: 27kB
Buffers: shared hit=146
-> Nested Loop (cost=0.56..489.06 rows=120 width=42) (actual time=0.036..0.267 rows=120 loops=1)
Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
Buffers: shared hit=146
-> Values Scan on "*VALUES*" (cost=0.00..0.03 rows=2 width=32) (actual time=0.001..0.001 rows=2 loops=1)
Output: "*VALUES*".column1
-> Nested Loop (cost=0.56..243.92 rows=60 width=42) (actual time=0.020..0.127 rows=60 loops=2)
Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
Buffers: shared hit=146
-> Values Scan on "*VALUES*_1" (cost=0.00..0.04 rows=3 width=32) (actual time=0.000..0.000 rows=3 loops=2)
Output: "*VALUES*_1".column1
-> Limit (cost=0.56..81.09 rows=20 width=42) (actual time=0.014..0.040 rows=20 loops=6)
Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
Buffers: shared hit=146
-> Index Scan using docs_sent_at_idx on public.docs (cost=0.56..20124.52 rows=4998 width=42) (actual time=0.013..0.038 rows=20 loops=6)
Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
Index Cond: (((docs.sender_reference)::text = "*VALUES*_1".column1) AND ((docs.status)::text = "*VALUES*".column1))
Buffers: shared hit=146
Planning Time: 0.210 ms
Execution Time: 0.327 ms
(27 rows)
Nice one @FranckPachot and thanks for writing this up @benoittgt!
Hello
A new benchmark made on saop-dynamic-skip-v11.45. Same as initial post with this setup and query:
- Carefull setup script is slow
-- SETUP SCRIPT :: START --
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
id SERIAL PRIMARY KEY,
type varchar(40) DEFAULT 'pdf' NOT NULL,
status varchar(40) NOT NULL,
sender_reference varchar(40) NOT NULL,
sent_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now() NOT NULL
);
INSERT INTO
docs (type, status, sender_reference, sent_at)
SELECT
('{pdf,doc,raw}'::text[])[ceil(random()*3)],
('{sent,draft,suspended}'::text[])[ceil(random()*3)],
('{Custom,Client}'::text[])[ceil(random()*2)] || '/' || floor(random() * 2000),
(LOCALTIMESTAMP - interval '2 years' * random())::timestamptz
FROM generate_series(1, 60000000) g;
CREATE INDEX docs_sent_at_idx ON docs USING btree (sender_reference, status, sent_at DESC NULLS LAST);
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status);
CREATE INDEX docs_sent_at_idx_2 ON docs USING btree (sender_reference, sent_at DESC NULLS LAST);
VACUUM(ANALYZE) docs;
-- SETUP SCRIPT :: END --
SELECT sender_reference, COUNT(*) FROM docs GROUP BY sender_reference HAVING COUNT(*) > 1000 ORDER BY 2 LIMIT 10; -- grab sender_reference with some documents if needed
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;
result.. It's still very good
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.56..1415.02 rows=20 width=41) (actual time=2.508..10.596 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Buffers: shared hit=4 read=214
-> Index Scan using docs_sent_at_idx_1 on public.docs (cost=0.56..2119638.18 rows=29971 width=41) (actual time=2.506..10.590 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
Buffers: shared hit=4 read=214
Planning:
Buffers: shared hit=58
Planning Time: 0.299 ms
Execution Time: 10.617 ms
(11 rows)
There are many benchmarks in the Peter's contribution now. My benchmark is probably not really usefull now but for us (my company) it's good way to track the progress of the great contributions of Peter.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Another way to do something similar to the UNION ALL, with CTE, Nested Loop and LATERAL join: