Skip to content

Instantly share code, notes, and snippets.

@benoittgt
Created January 6, 2023 16:29
Show Gist options
  • Select an option

  • Save benoittgt/95df5f56d4a6c2a14715060a850a7937 to your computer and use it in GitHub Desktop.

Select an option

Save benoittgt/95df5f56d4a6c2a14715060a850a7937 to your computer and use it in GitHub Desktop.
Why index "index_events_on_project_id_kind_emitted_at" is not used?
DROP TABLE IF EXISTS events;
CREATE table events (
id serial PRIMARY KEY,
project_id BIGINT NOT NULL,
kind TEXT NOT NULL,
emitted_at TIMESTAMP NOT NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
SELECT floor(random() * (30-1+1) + 1)::int; -- random number between 1 and 100
SELECT (array['u', 'v', 'w', 'x', 'y', 'z'])[floor(random() * 6 + 1)]; -- random kind
SELECT ('2022-' || floor(random() * (12-10+1) + 10)::int || '-' || floor(random() * (29-10+1) + 10)::int)::timestamp; -- random date
INSERT INTO events (project_id, kind, emitted_at)
SELECT
floor(random() * (100-1+1) + 1)::int,
(array['u', 'v', 'w', 'x', 'y', 'z'])[floor(random() * 6 + 1)],
('2022-' || floor(random() * (12-10+1) + 10)::int || '-' || floor(random() * (29-10+1) + 10)::int)::timestamp
FROM generate_series(1, 20000000) i;
-- Query similar to my app (we will use this query all the time)
EXPLAIN ANALYZE VERBOSE
SELECT COUNT(*) FROM (
SELECT 1 AS one FROM "events" WHERE events.project_id IN (SELECT GENERATE_SERIES(10,30) AS index) AND "events"."kind" IN ('x', 'y', 'w', 'v') ORDER BY emitted_at desc LIMIT 30) subquery_for_count;
-- 2s with a Parallel Seq Scan that remove 2 222 386 rows
DROP INDEX IF EXISTS index_events_on_emitted_at_project_id_kind;
-- Create index with the same query WHERE order
CREATE INDEX CONCURRENTLY index_events_on_project_id_kind_emitted_at ON events(project_id, kind, emitted_at);
EXPLAIN ANALYZE VERBOSE
SELECT COUNT(*) FROM (
SELECT 1 AS one FROM "events" WHERE events.project_id IN (SELECT GENERATE_SERIES(10,30) AS index) AND "events"."kind" IN ('x', 'y', 'w', 'v') ORDER BY emitted_at desc LIMIT 30) subquery_for_count;
-- 6s no usage of the index
-- Why the index is not used?
-- Put emitted at as first column in the index
CREATE INDEX CONCURRENTLY index_events_on_emitted_at_project_id_kind ON events(emitted_at, project_id, kind);
EXPLAIN ANALYZE VERBOSE
SELECT COUNT(*) FROM (
SELECT 1 AS one FROM "events" WHERE events.project_id IN (SELECT GENERATE_SERIES(10,30) AS index) AND "events"."kind" IN ('x', 'y', 'w', 'v') ORDER BY emitted_at desc LIMIT 30) subquery_for_count;
-- 500ms usage of the index
-- Index Only Scan Backward using index_events_on_emitted_at_project_id_kind on public.events (cost=0.56..1544080.16 rows=13430667 width=16) (actual time=6.219..406.525 rows=156003 loops=1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment