felixge · January 18, 2020 11:38
diff --git a/pr-7864.sql b/pr-7864.sql
 SET search_path=query_test;
 DROP SCHEMA IF EXISTS query_test CASCADE;
 CREATE SCHEMA query_test;

 CREATE TABLE campaigns (
  id int PRIMARY KEY,
  campaign_plan_id int
 );

 INSERT INTO campaigns VALUES
  (1, 1),
  (2, 2);

 CREATE TABLE changeset_jobs (
  id int PRIMARY KEY,
  campaign_id int REFERENCES campaigns,
  created_at timestamptz
 );

 INSERT INTO changeset_jobs
 SELECT g, 1, now() + (g||' hour')::interval
 FROM generate_series(1, 100000) g;

 CREATE TABLE campaign_jobs (
  id int PRIMARY KEY,
  campaign_plan_id int
 );

 INSERT INTO campaign_jobs
 SELECT g, 1
 FROM generate_series(1, 100000) g;

 -- The original query PR #7864
 EXPLAIN ANALYZE
 SELECT CASE WHEN (
  SELECT count(campaign_jobs.id)
  FROM campaigns c
  JOIN campaign_jobs ON campaign_jobs.campaign_plan_id = c.campaign_plan_id
  WHERE c.id = 1
 ) = (
  SELECT count(id)
  FROM   changeset_jobs
  WHERE  campaign_id = 1
 ) THEN (
  SELECT max(created_at)
  FROM   changeset_jobs
  WHERE  campaign_id = 1
 )
 END;

 -- A simplification of the original query that produces 0 rows (instead of
 -- NULL) when the changeset_jobs.count doesn't match the campaign_jobs.count.
 -- It also produces a slightly better/faster query plan.
 EXPLAIN ANALYZE
 SELECT max(created_at)
 FROM changeset_jobs
 WHERE campaign_id = 1
 HAVING count(id) = (
  SELECT count(campaign_jobs.id)
  FROM campaigns c
  JOIN campaign_jobs ON campaign_jobs.campaign_plan_id = c.campaign_plan_id
  WHERE c.id = 1
 );

 -- Behaves like the previous query, but is arguably more complicated than the
 -- original. Howver, this query produces an optimal query plan and is ~2x
 -- faster than the original.
 EXPLAIN ANALYZE
 SELECT changeset_jobs_agg.max
 FROM campaigns c
 JOIN LATERAL (
  SELECT count(id)
  FROM campaign_jobs
  WHERE campaign_jobs.campaign_plan_id = c.campaign_plan_id
 ) campaign_jobs_agg ON TRUE
 JOIN LATERAL (
  SELECT count(id), max(created_at)
  FROM changeset_jobs
  WHERE changeset_jobs.campaign_id = c.id
 ) changeset_jobs_agg ON TRUE
 WHERE c.id = 1 AND campaign_jobs_agg.count = changeset_jobs_agg.count;
diff --git a/run.txt b/run.txt
 SET
 DROP SCHEMA
 CREATE SCHEMA
 CREATE TABLE
 INSERT 0 2
 CREATE TABLE
 INSERT 0 100000
 CREATE TABLE
 INSERT 0 100000
                                                                      QUERY PLAN                                                                      
 ------------------------------------------------------------------------------------------------------------------------------------------------------
 Result  (cost=5302.86..5302.87 rows=1 width=8) (actual time=137.286..137.287 rows=1 loops=1)
   InitPlan 1 (returns $0)
     ->  Aggregate  (cost=1716.20..1716.21 rows=1 width=8) (actual time=64.484..64.484 rows=1 loops=1)
           ->  Hash Join  (cost=2.39..1714.95 rows=501 width=4) (actual time=0.063..52.608 rows=100000 loops=1)
                 Hash Cond: (campaign_jobs.campaign_plan_id = c.campaign_plan_id)
                 ->  Seq Scan on campaign_jobs  (cost=0.00..1444.18 rows=100118 width=8) (actual time=0.018..16.386 rows=100000 loops=1)
                 ->  Hash  (cost=2.37..2.37 rows=1 width=4) (actual time=0.012..0.012 rows=1 loops=1)
                       Buckets: 1024  Batches: 1  Memory Usage: 9kB
                       ->  Index Scan using campaigns_pkey on campaigns c  (cost=0.15..2.37 rows=1 width=4) (actual time=0.005..0.005 rows=1 loops=1)
                             Index Cond: (id = 1)
   InitPlan 2 (returns $1)
     ->  Aggregate  (cost=1793.31..1793.32 rows=1 width=8) (actual time=35.127..35.128 rows=1 loops=1)
           ->  Seq Scan on changeset_jobs  (cost=0.00..1792.06 rows=500 width=4) (actual time=0.021..23.473 rows=100000 loops=1)
                 Filter: (campaign_id = 1)
   InitPlan 3 (returns $2)
     ->  Aggregate  (cost=1793.31..1793.32 rows=1 width=8) (actual time=37.659..37.659 rows=1 loops=1)
           ->  Seq Scan on changeset_jobs changeset_jobs_1  (cost=0.00..1792.06 rows=500 width=8) (actual time=0.015..21.216 rows=100000 loops=1)
                 Filter: (campaign_id = 1)
 Planning Time: 0.482 ms
 Execution Time: 137.408 ms
 (20 rows)

                                                                      QUERY PLAN                                                                      
 ------------------------------------------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=3510.77..3510.79 rows=1 width=8) (actual time=98.486..98.486 rows=1 loops=1)
   Filter: (count(changeset_jobs.id) = $0)
   InitPlan 1 (returns $0)
     ->  Aggregate  (cost=1716.20..1716.21 rows=1 width=8) (actual time=62.262..62.263 rows=1 loops=1)
           ->  Hash Join  (cost=2.39..1714.95 rows=501 width=4) (actual time=0.052..49.942 rows=100000 loops=1)
                 Hash Cond: (campaign_jobs.campaign_plan_id = c.campaign_plan_id)
                 ->  Seq Scan on campaign_jobs  (cost=0.00..1444.18 rows=100118 width=8) (actual time=0.022..13.392 rows=100000 loops=1)
                 ->  Hash  (cost=2.37..2.37 rows=1 width=4) (actual time=0.014..0.014 rows=1 loops=1)
                       Buckets: 1024  Batches: 1  Memory Usage: 9kB
                       ->  Index Scan using campaigns_pkey on campaigns c  (cost=0.15..2.37 rows=1 width=4) (actual time=0.010..0.010 rows=1 loops=1)
                             Index Cond: (id = 1)
   ->  Seq Scan on changeset_jobs  (cost=0.00..1792.06 rows=500 width=12) (actual time=0.010..19.192 rows=100000 loops=1)
         Filter: (campaign_id = 1)
 Planning Time: 0.163 ms
 Execution Time: 98.559 ms
 (15 rows)

                                                               QUERY PLAN                                                               
 ----------------------------------------------------------------------------------------------------------------------------------------
 Nested Loop  (cost=3490.45..3492.72 rows=1 width=8) (actual time=66.227..66.230 rows=1 loops=1)
   Join Filter: ((count(campaign_jobs.id)) = (count(changeset_jobs.id)))
   ->  Nested Loop  (cost=1695.88..1698.13 rows=1 width=12) (actual time=29.977..29.980 rows=1 loops=1)
         ->  Index Scan using campaigns_pkey on campaigns c  (cost=0.15..2.37 rows=1 width=8) (actual time=0.005..0.007 rows=1 loops=1)
               Index Cond: (id = 1)
         ->  Aggregate  (cost=1695.73..1695.74 rows=1 width=8) (actual time=29.969..29.969 rows=1 loops=1)
               ->  Seq Scan on campaign_jobs  (cost=0.00..1694.48 rows=501 width=4) (actual time=0.010..19.097 rows=100000 loops=1)
                     Filter: (campaign_plan_id = c.campaign_plan_id)
   ->  Aggregate  (cost=1794.56..1794.57 rows=1 width=16) (actual time=36.243..36.243 rows=1 loops=1)
         ->  Seq Scan on changeset_jobs  (cost=0.00..1792.06 rows=500 width=12) (actual time=0.015..19.704 rows=100000 loops=1)
               Filter: (campaign_id = c.id)
 Planning Time: 0.241 ms
 Execution Time: 66.291 ms
 (13 rows)
	SET search_path=query_test;
	DROP SCHEMA IF EXISTS query_test CASCADE;
	CREATE SCHEMA query_test;

	CREATE TABLE campaigns (
	id int PRIMARY KEY,
	campaign_plan_id int
	);

	INSERT INTO campaigns VALUES
	(1, 1),
	(2, 2);

	CREATE TABLE changeset_jobs (
	id int PRIMARY KEY,
	campaign_id int REFERENCES campaigns,
	created_at timestamptz
	);

	INSERT INTO changeset_jobs
	SELECT g, 1, now() + (g\|\|' hour')::interval
	FROM generate_series(1, 100000) g;

	CREATE TABLE campaign_jobs (
	id int PRIMARY KEY,
	campaign_plan_id int
	);

	INSERT INTO campaign_jobs
	SELECT g, 1
	FROM generate_series(1, 100000) g;

	-- The original query PR #7864
	EXPLAIN ANALYZE
	SELECT CASE WHEN (
	SELECT count(campaign_jobs.id)
	FROM campaigns c
	JOIN campaign_jobs ON campaign_jobs.campaign_plan_id = c.campaign_plan_id
	WHERE c.id = 1
	) = (
	SELECT count(id)
	FROM changeset_jobs
	WHERE campaign_id = 1
	) THEN (
	SELECT max(created_at)
	FROM changeset_jobs
	WHERE campaign_id = 1
	)
	END;

	-- A simplification of the original query that produces 0 rows (instead of
	-- NULL) when the changeset_jobs.count doesn't match the campaign_jobs.count.
	-- It also produces a slightly better/faster query plan.
	EXPLAIN ANALYZE
	SELECT max(created_at)
	FROM changeset_jobs
	WHERE campaign_id = 1
	HAVING count(id) = (
	SELECT count(campaign_jobs.id)
	FROM campaigns c
	JOIN campaign_jobs ON campaign_jobs.campaign_plan_id = c.campaign_plan_id
	WHERE c.id = 1
	);

	-- Behaves like the previous query, but is arguably more complicated than the
	-- original. Howver, this query produces an optimal query plan and is ~2x
	-- faster than the original.
	EXPLAIN ANALYZE
	SELECT changeset_jobs_agg.max
	FROM campaigns c
	JOIN LATERAL (
	SELECT count(id)
	FROM campaign_jobs
	WHERE campaign_jobs.campaign_plan_id = c.campaign_plan_id
	) campaign_jobs_agg ON TRUE
	JOIN LATERAL (
	SELECT count(id), max(created_at)
	FROM changeset_jobs
	WHERE changeset_jobs.campaign_id = c.id
	) changeset_jobs_agg ON TRUE
	WHERE c.id = 1 AND campaign_jobs_agg.count = changeset_jobs_agg.count;
	SET
	DROP SCHEMA
	CREATE SCHEMA
	CREATE TABLE
	INSERT 0 2
	CREATE TABLE
	INSERT 0 100000
	CREATE TABLE
	INSERT 0 100000
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------------------------
	Result (cost=5302.86..5302.87 rows=1 width=8) (actual time=137.286..137.287 rows=1 loops=1)
	InitPlan 1 (returns $0)
	-> Aggregate (cost=1716.20..1716.21 rows=1 width=8) (actual time=64.484..64.484 rows=1 loops=1)
	-> Hash Join (cost=2.39..1714.95 rows=501 width=4) (actual time=0.063..52.608 rows=100000 loops=1)
	Hash Cond: (campaign_jobs.campaign_plan_id = c.campaign_plan_id)
	-> Seq Scan on campaign_jobs (cost=0.00..1444.18 rows=100118 width=8) (actual time=0.018..16.386 rows=100000 loops=1)
	-> Hash (cost=2.37..2.37 rows=1 width=4) (actual time=0.012..0.012 rows=1 loops=1)
	Buckets: 1024 Batches: 1 Memory Usage: 9kB
	-> Index Scan using campaigns_pkey on campaigns c (cost=0.15..2.37 rows=1 width=4) (actual time=0.005..0.005 rows=1 loops=1)
	Index Cond: (id = 1)
	InitPlan 2 (returns $1)
	-> Aggregate (cost=1793.31..1793.32 rows=1 width=8) (actual time=35.127..35.128 rows=1 loops=1)
	-> Seq Scan on changeset_jobs (cost=0.00..1792.06 rows=500 width=4) (actual time=0.021..23.473 rows=100000 loops=1)
	Filter: (campaign_id = 1)
	InitPlan 3 (returns $2)
	-> Aggregate (cost=1793.31..1793.32 rows=1 width=8) (actual time=37.659..37.659 rows=1 loops=1)
	-> Seq Scan on changeset_jobs changeset_jobs_1 (cost=0.00..1792.06 rows=500 width=8) (actual time=0.015..21.216 rows=100000 loops=1)
	Filter: (campaign_id = 1)
	Planning Time: 0.482 ms
	Execution Time: 137.408 ms
	(20 rows)

	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------------------------
	Aggregate (cost=3510.77..3510.79 rows=1 width=8) (actual time=98.486..98.486 rows=1 loops=1)
	Filter: (count(changeset_jobs.id) = $0)
	InitPlan 1 (returns $0)
	-> Aggregate (cost=1716.20..1716.21 rows=1 width=8) (actual time=62.262..62.263 rows=1 loops=1)
	-> Hash Join (cost=2.39..1714.95 rows=501 width=4) (actual time=0.052..49.942 rows=100000 loops=1)
	Hash Cond: (campaign_jobs.campaign_plan_id = c.campaign_plan_id)
	-> Seq Scan on campaign_jobs (cost=0.00..1444.18 rows=100118 width=8) (actual time=0.022..13.392 rows=100000 loops=1)
	-> Hash (cost=2.37..2.37 rows=1 width=4) (actual time=0.014..0.014 rows=1 loops=1)
	Buckets: 1024 Batches: 1 Memory Usage: 9kB
	-> Index Scan using campaigns_pkey on campaigns c (cost=0.15..2.37 rows=1 width=4) (actual time=0.010..0.010 rows=1 loops=1)
	Index Cond: (id = 1)
	-> Seq Scan on changeset_jobs (cost=0.00..1792.06 rows=500 width=12) (actual time=0.010..19.192 rows=100000 loops=1)
	Filter: (campaign_id = 1)
	Planning Time: 0.163 ms
	Execution Time: 98.559 ms
	(15 rows)

	QUERY PLAN
	----------------------------------------------------------------------------------------------------------------------------------------
	Nested Loop (cost=3490.45..3492.72 rows=1 width=8) (actual time=66.227..66.230 rows=1 loops=1)
	Join Filter: ((count(campaign_jobs.id)) = (count(changeset_jobs.id)))
	-> Nested Loop (cost=1695.88..1698.13 rows=1 width=12) (actual time=29.977..29.980 rows=1 loops=1)
	-> Index Scan using campaigns_pkey on campaigns c (cost=0.15..2.37 rows=1 width=8) (actual time=0.005..0.007 rows=1 loops=1)
	Index Cond: (id = 1)
	-> Aggregate (cost=1695.73..1695.74 rows=1 width=8) (actual time=29.969..29.969 rows=1 loops=1)
	-> Seq Scan on campaign_jobs (cost=0.00..1694.48 rows=501 width=4) (actual time=0.010..19.097 rows=100000 loops=1)
	Filter: (campaign_plan_id = c.campaign_plan_id)
	-> Aggregate (cost=1794.56..1794.57 rows=1 width=16) (actual time=36.243..36.243 rows=1 loops=1)
	-> Seq Scan on changeset_jobs (cost=0.00..1792.06 rows=500 width=12) (actual time=0.015..19.704 rows=100000 loops=1)
	Filter: (campaign_id = c.id)
	Planning Time: 0.241 ms
	Execution Time: 66.291 ms
	(13 rows)