Skip to content

Instantly share code, notes, and snippets.

@benoittgt
Last active September 26, 2024 11:43
Show Gist options
  • Save benoittgt/ab72dc4cfedea2a0c6a5ee809d16e04d to your computer and use it in GitHub Desktop.
Save benoittgt/ab72dc4cfedea2a0c6a5ee809d16e04d to your computer and use it in GitHub Desktop.
-- Carefull setup script is slow
-- SETUP SCRIPT :: START --
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
id SERIAL PRIMARY KEY,
type varchar(40) DEFAULT 'pdf' NOT NULL,
status varchar(40) NOT NULL,
sender_reference varchar(40) NOT NULL,
sent_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now() NOT NULL
);
INSERT INTO
docs (type, status, sender_reference, sent_at)
SELECT
('{pdf,doc,raw}'::text[])[ceil(random()*3)],
('{sent,draft,suspended}'::text[])[ceil(random()*3)],
('{Custom,Client}'::text[])[ceil(random()*2)] || '/' || floor(random() * 2000),
(LOCALTIMESTAMP - interval '2 years' * random())::timestamptz
FROM generate_series(1, 60000000) g;
CREATE INDEX docs_sent_at_idx ON docs USING btree (sender_reference, status, sent_at DESC NULLS LAST);
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status);
CREATE INDEX docs_sent_at_idx_2 ON docs USING btree (sender_reference, sent_at DESC NULLS LAST);
VACUUM(ANALYZE) docs;
-- SETUP SCRIPT :: END --
SELECT sender_reference, COUNT(*) FROM docs GROUP BY sender_reference HAVING COUNT(*) > 1000 ORDER BY 2 LIMIT 10; -- grab sender_reference with some documents if needed
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;
/*
Limit (cost=0.56..26727.94 rows=20 width=38) (actual time=116.926..218.789 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Buffers: shared hit=1421 read=45046
-> Index Scan using docs_sent_at_idx_1 on public.docs (cost=0.56..39748945.58 rows=29744 width=38) (actual time=116.924..218.784 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Filter: (((docs.status)::text = ANY ('{draft,sent}'::text[])) AND ((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])))
Rows Removed by Filter: 46354
Buffers: shared hit=1421 read=45046
Query Identifier: -4773896564660341574
Planning Time: 0.282 ms
Execution Time: 218.817 ms
*/
DROP INDEX docs_sent_at_idx_1;
-- redo the query without the index used
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;
-- with warm cache, very expensive read, all rows returned then sorted
/*
Limit (cost=97013.42..97013.47 rows=20 width=38) (actual time=98.236..98.237 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Buffers: shared hit=15 read=29030
-> Sort (cost=97013.42..97087.78 rows=29744 width=38) (actual time=98.234..98.235 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Sort Key: docs.sent_at DESC NULLS LAST
Sort Method: top-N heapsort Memory: 27kB
Buffers: shared hit=15 read=29030
-> Bitmap Heap Scan on public.docs (cost=524.25..96221.94 rows=29744 width=38) (actual time=22.013..95.987 rows=29834 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Recheck Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
Heap Blocks: exact=28973
Buffers: shared hit=15 read=29030
-> Bitmap Index Scan on docs_sent_at_idx_3 (cost=0.00..516.81 rows=29744 width=0) (actual time=13.608..13.608 rows=29834 loops=1)
Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
Buffers: shared hit=15 read=57
Query Identifier: -4773896564660341574
Planning Time: 0.312 ms
Execution Time: 98.320 ms
*/
-- Recreate previous deleted index
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status);
VACUUM(ANALYZE) docs;
-- Decompose using UNION ALL. Result will be a little bit different (I think because of sent_at insertion :thinking:)
-- but it is the same amount of docs than the previous query.
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT *
FROM
(
(
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Custom/1175'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Custom/1175'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Client/362'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Client/362'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Custom/280'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Custom/280'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
)
) docs
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0;
-- Fast query with very low usage of IO or disk read. Good usage of index.
/*
Limit (cost=3.47..84.20 rows=20 width=38) (actual time=4.340..4.398 rows=20 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Buffers: shared hit=36 read=13
-> Merge Append (cost=3.47..487.86 rows=120 width=38) (actual time=4.338..4.393 rows=20 loops=1)
Sort Key: docs_1.sent_at DESC NULLS LAST
Buffers: shared hit=36 read=13
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=2.081..2.101 rows=9 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Buffers: shared hit=9 read=4
-> Index Scan using docs_sent_at_idx on public.docs docs_1 (cost=0.56..19763.16 rows=4930 width=38) (actual time=2.079..2.098 rows=9 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Index Cond: (((docs_1.sender_reference)::text = 'Custom/1175'::text) AND ((docs_1.status)::text = 'draft'::text))
Buffers: shared hit=9 read=4
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.204..0.215 rows=6 loops=1)
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at
Buffers: shared hit=9 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_2 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.202..0.212 rows=6 loops=1)
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at
Index Cond: (((docs_2.sender_reference)::text = 'Custom/1175'::text) AND ((docs_2.status)::text = 'sent'::text))
Buffers: shared hit=9 read=1
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.755..0.756 rows=1 loops=1)
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at
Buffers: shared hit=2 read=3
-> Index Scan using docs_sent_at_idx on public.docs docs_3 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.754..0.754 rows=1 loops=1)
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at
Index Cond: (((docs_3.sender_reference)::text = 'Client/362'::text) AND ((docs_3.status)::text = 'draft'::text))
Buffers: shared hit=2 read=3
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.195..0.200 rows=3 loops=1)
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at
Buffers: shared hit=6 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_4 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.194..0.198 rows=3 loops=1)
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at
Index Cond: (((docs_4.sender_reference)::text = 'Client/362'::text) AND ((docs_4.status)::text = 'sent'::text))
Buffers: shared hit=6 read=1
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.888..0.897 rows=5 loops=1)
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at
Buffers: shared hit=6 read=3
-> Index Scan using docs_sent_at_idx on public.docs docs_5 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.887..0.895 rows=5 loops=1)
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at
Index Cond: (((docs_5.sender_reference)::text = 'Custom/280'::text) AND ((docs_5.status)::text = 'draft'::text))
Buffers: shared hit=6 read=3
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.207..0.208 rows=1 loops=1)
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at
Buffers: shared hit=4 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_6 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.206..0.206 rows=1 loops=1)
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at
Index Cond: (((docs_6.sender_reference)::text = 'Custom/280'::text) AND ((docs_6.status)::text = 'sent'::text))
Buffers: shared hit=4 read=1
Query Identifier: -998395605132531303
Planning:
Buffers: shared hit=5
Planning Time: 1.633 ms
Execution Time: 4.498 ms
*/
SELECT version();
-- PostgreSQL 14.8 (Homebrew) on aarch64-apple-darwin22.4.0, compiled by Apple clang version 14.0.3 (clang-1403.0.22.14.1), 64-bit
@benoittgt
Copy link
Author

benoittgt commented Sep 13, 2023

with https://github.com/petergeoghegan/postgres/tree/saop-dynamic-skip-v1.36

 Limit  (cost=0.56..69873.57 rows=20 width=42) (actual time=205.625..2623.441 rows=20 loops=1)
   Output: id, type, status, sender_reference, sent_at, created_at
   Buffers: shared hit=879 read=32219 written=11011
   ->  Index Scan using docs_sent_at_idx_1 on public.docs  (cost=0.56..103401561.43 rows=29597 width=42) (actual time=205.625..2623.431 rows=20 loops=1)
         Output: id, type, status, sender_reference, sent_at, created_at
         Filter: (((docs.status)::text = ANY ('{draft,sent}'::text[])) AND ((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])))
         Rows Removed by Filter: 32883
         Buffers: shared hit=879 read=32219 written=11011
 Planning:
   Buffers: shared hit=58
 Planning Time: 0.339 ms
 Execution Time: 2623.458 ms

EDIT: This test needs to be re-done because the result seems to be strange.

@benoittgt
Copy link
Author

With https://github.com/petergeoghegan/postgres/tree/saop-dynamic-skip-v5.21

-- all previous script ran
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;

/*
 Limit  (cost=0.56..1418.84 rows=20 width=41) (actual time=3.684..11.547 rows=20 loops=1)
   Output: id, type, status, sender_reference, sent_at, created_at
   Buffers: shared hit=4 read=296
   ->  Index Scan using docs_sent_at_idx_1 on public.docs  (cost=0.56..2119258.36 rows=29885 width=41) (actual time=3.683..11.542 rows=20 loops=1)
         Output: id, type, status, sender_reference, sent_at, created_at
         Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
         Buffers: shared hit=4 read=296
 Planning:
   Buffers: shared hit=58
 Planning Time: 0.373 ms
 Execution Time: 11.566 ms
*/

@FranckPachot
Copy link

Another way to do something similar to the UNION ALL, with CTE, Nested Loop and LATERAL join:

WITH
 status_list(status) as ( values ('draft'),('sent') )
,sender_reference_list(sender_reference) as ( values ('Custom/1175'),('Client/362'),('Custom/280') )
,list as ( select * from status_list cross join sender_reference_list)
select docs.* from list cross join lateral (
 SELECT * FROM docs
 WHERE status=list.status AND sender_reference = list.sender_reference
 ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0
) as docs
 ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;

@benoittgt
Copy link
Author

Output on the @FranckPachot query on PostgreSQL 16.1

 Limit  (cost=492.25..492.30 rows=20 width=42) (actual time=0.295..0.298 rows=20 loops=1)
   Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
   Buffers: shared hit=146
   ->  Sort  (cost=492.25..492.55 rows=120 width=42) (actual time=0.294..0.296 rows=20 loops=1)
         Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
         Sort Key: docs.sent_at DESC NULLS LAST
         Sort Method: top-N heapsort  Memory: 27kB
         Buffers: shared hit=146
         ->  Nested Loop  (cost=0.56..489.06 rows=120 width=42) (actual time=0.036..0.267 rows=120 loops=1)
               Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
               Buffers: shared hit=146
               ->  Values Scan on "*VALUES*"  (cost=0.00..0.03 rows=2 width=32) (actual time=0.001..0.001 rows=2 loops=1)
                     Output: "*VALUES*".column1
               ->  Nested Loop  (cost=0.56..243.92 rows=60 width=42) (actual time=0.020..0.127 rows=60 loops=2)
                     Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
                     Buffers: shared hit=146
                     ->  Values Scan on "*VALUES*_1"  (cost=0.00..0.04 rows=3 width=32) (actual time=0.000..0.000 rows=3 loops=2)
                           Output: "*VALUES*_1".column1
                     ->  Limit  (cost=0.56..81.09 rows=20 width=42) (actual time=0.014..0.040 rows=20 loops=6)
                           Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
                           Buffers: shared hit=146
                           ->  Index Scan using docs_sent_at_idx on public.docs  (cost=0.56..20124.52 rows=4998 width=42) (actual time=0.013..0.038 rows=20 loops=6)
                                 Output: docs.id, docs.type, docs.status, docs.sender_reference, docs.sent_at, docs.created_at
                                 Index Cond: (((docs.sender_reference)::text = "*VALUES*_1".column1) AND ((docs.status)::text = "*VALUES*".column1))
                                 Buffers: shared hit=146
 Planning Time: 0.210 ms
 Execution Time: 0.327 ms
(27 rows)

@andyatkinson
Copy link

Nice one @FranckPachot and thanks for writing this up @benoittgt!

@benoittgt
Copy link
Author

Hello

A new benchmark made on saop-dynamic-skip-v11.45. Same as initial post with this setup and query:

- Carefull setup script is slow
-- SETUP SCRIPT :: START --
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
  id SERIAL PRIMARY KEY,
  type varchar(40) DEFAULT 'pdf' NOT NULL,
  status varchar(40) NOT NULL,
  sender_reference varchar(40) NOT NULL,
  sent_at TIMESTAMPTZ,
  created_at TIMESTAMPTZ DEFAULT now() NOT NULL
);


INSERT INTO
  docs (type, status, sender_reference, sent_at)
SELECT
  ('{pdf,doc,raw}'::text[])[ceil(random()*3)],
  ('{sent,draft,suspended}'::text[])[ceil(random()*3)],
  ('{Custom,Client}'::text[])[ceil(random()*2)] || '/' || floor(random() * 2000),
  (LOCALTIMESTAMP - interval '2 years' * random())::timestamptz
FROM generate_series(1, 60000000) g;

CREATE INDEX docs_sent_at_idx ON docs USING btree (sender_reference, status, sent_at DESC NULLS LAST);
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status); 
CREATE INDEX docs_sent_at_idx_2 ON docs USING btree (sender_reference, sent_at DESC NULLS LAST);

VACUUM(ANALYZE) docs;
-- SETUP SCRIPT :: END --

SELECT sender_reference, COUNT(*) FROM docs GROUP BY sender_reference HAVING COUNT(*) > 1000 ORDER BY 2 LIMIT 10; -- grab sender_reference with some documents if needed

EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;

result.. It's still very good

                                                                              QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=0.56..1415.02 rows=20 width=41) (actual time=2.508..10.596 rows=20 loops=1)
   Output: id, type, status, sender_reference, sent_at, created_at
   Buffers: shared hit=4 read=214
   ->  Index Scan using docs_sent_at_idx_1 on public.docs  (cost=0.56..2119638.18 rows=29971 width=41) (actual time=2.506..10.590 rows=20 loops=1)
         Output: id, type, status, sender_reference, sent_at, created_at
         Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
         Buffers: shared hit=4 read=214
 Planning:
   Buffers: shared hit=58
 Planning Time: 0.299 ms
 Execution Time: 10.617 ms
(11 rows)

There are many benchmarks in the Peter's contribution now. My benchmark is probably not really usefull now but for us (my company) it's good way to track the progress of the great contributions of Peter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment