Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save rustprooflabs/aa6832a470f81817a4ab6fe0d9809de6 to your computer and use it in GitHub Desktop.
Save rustprooflabs/aa6832a470f81817a4ab6fe0d9809de6 to your computer and use it in GitHub Desktop.
Minimal test case illustrating example where partition pruning is not happening. Simplified example to better test findings for Query 2 on this post https://blog.rustprooflabs.com/2021/02/postgres-partition-openstreetmap-road-v1-review
DROP TABLE IF EXISTS p_group;
CREATE TABLE p_group
(
id BIGINT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
osm_date DATE NOT NULL,
region TEXT NOT NULL,
CONSTRAINT uq_p_group UNIQUE (osm_date, region)
);
INSERT INTO p_group (osm_date, region)
SELECT '2020-01-01'::DATE + (x || ' months')::INTERVAL AS osm_date,
'region_name' AS region
FROM generate_series(0, 1) x
;
SELECT * FROM p_group;
DROP TABLE IF EXISTS p;
CREATE TABLE p
(
p_id BIGINT NOT NULL,
id BIGINT NOT NULL,
CONSTRAINT pk_p PRIMARY KEY (p_id, id)
) PARTITION BY LIST (p_id)
;
CREATE TABLE p1 PARTITION OF p FOR VALUES IN (1);
CREATE TABLE p2 PARTITION OF p FOR VALUES IN (2);
INSERT INTO p1 (p_id, id)
SELECT 1, x
FROM generate_series(1, 50000) x
;
INSERT INTO p2 (p_id, id)
SELECT 2, x
FROM generate_series(1, 50000) x
;
SELECT * FROM p_group;
SELECT p_id, COUNT(*) FROM p GROUP BY p_id;
SELECT COUNT(*) FROM p1;
SELECT COUNT(*) FROM p2;
-- Plan prunes out p2 here
EXPLAIN (COSTS OFF)
SELECT osm_date, region, COUNT(*)
FROM p
INNER JOIN p_group ON p.p_id = p_group.id
WHERE p.p_id = 1
GROUP BY osm_date, region
;
-- Scans p1 and p2 via plan
EXPLAIN (COSTS OFF)
SELECT osm_date, region, COUNT(*)
FROM p
INNER JOIN p_group
ON p.p_id = p_group.id
AND p_group.osm_date = '2020-01-01' AND region = 'region_name'
GROUP BY osm_date, region
;
-- Both p1 and p2 are scanned - Uses Hash Join
EXPLAIN (ANALYZE, COSTS OFF)
SELECT osm_date, region, COUNT(*)
FROM p
INNER JOIN p_group
ON p.p_id = p_group.id
AND p_group.osm_date = '2020-01-01' AND region = 'region_name'
GROUP BY osm_date, region
;
-- Discourage Hash Join
-- Now uses Nested Loop but still scans both p1 and p2
-- See: https://explain.depesz.com/s/UnoF
SHOW enable_hashjoin;
SET enable_hashjoin = off;
SHOW enable_hashjoin;
EXPLAIN (ANALYZE, COSTS OFF)
SELECT osm_date, region, COUNT(*)
FROM p
INNER JOIN p_group
ON p.p_id = p_group.id
AND p_group.osm_date = '2020-01-01' AND region = 'region_name'
GROUP BY osm_date, region
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment