rgarner · August 29, 2015 14:06 · rgarner · Sep 17, 2014
diff --git a/9.1-post-hmrc-with-43K-staging.output b/9.1-post-hmrc-with-43K-staging.output
                                                                                  QUERY PLAN
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Update on hits  (cost=111.65..925161.69 rows=13 width=130) (actual time=3022770.690..3022770.690 rows=0 loops=1)
   ->  Nested Loop  (cost=111.65..925161.69 rows=13 width=130) (actual time=3022770.687..3022770.687 rows=0 loops=1)
         Join Filter: ((hits.count IS DISTINCT FROM st.count) AND (st.path = (hits.path)::text))
         ->  Hash Join  (cost=111.65..1885.26 rows=47071 width=60) (actual time=5.025..312.918 rows=47071 loops=1)
               Hash Cond: ((st.hostname)::text = (hosts.hostname)::text)
               ->  Seq Scan on hits_staging st  (cost=0.00..1008.71 rows=47071 width=65) (actual time=0.020..112.589 rows=47071 loops=1)
               ->  Hash  (cost=79.62..79.62 rows=2562 width=35) (actual time=4.978..4.978 rows=2562 loops=1)
                     Buckets: 1024  Batches: 1  Memory Usage: 174kB
                     ->  Seq Scan on hosts  (cost=0.00..79.62 rows=2562 width=35) (actual time=0.012..2.099 rows=2562 loops=1)
         ->  Index Scan using index_hits_on_host_id_and_hit_on_and_http_status on hits  (cost=0.00..19.52 rows=4 width=118) (actual time=0.031..54.440 rows=41886 loops=47071)
               Index Cond: ((host_id = hosts.id) AND (hit_on = st.hit_on) AND ((http_status)::text = (st.http_status)::text))
 Total runtime: 3022770.954 ms
 (12 rows)
diff --git a/9.1.output b/9.1.output
 transition_production=# select count(*) from hits;
  count
 ----------
 11781100
 (1 row)

 transition_production=# select count(*) from hits_staging;
 count
 -------
 33981
 (1 row)

 transition_production=# EXPLAIN ANALYZE UPDATE hits
        SET count = st.count
        FROM hits_staging st
        INNER JOIN hosts ON hosts.hostname = st.hostname
        WHERE
          hits.path        = st.path AND
          hits.http_status = st.http_status AND
          hits.hit_on      = st.hit_on AND
          hits.host_id     = hosts.id AND
          hits.count       IS DISTINCT FROM st.count;
 QUERY PLAN
 Update on hits  (cost=102.29..408357.07 rows=1 width=138) (actual time=54652.800..54652.800 rows=0 loops=1)
  ->  Nested Loop  (cost=102.29..408357.07 rows=1 width=138) (actual time=54652.798..54652.798 rows=0 loops=1)
        Join Filter: ((hits.count IS DISTINCT FROM st.count) AND (st.path = (hits.path)::text))
        ->  Hash Join  (cost=102.29..1339.81 rows=33981 width=60) (actual time=4.700..94.822 rows=33981 loops=1)
              Hash Cond: ((st.hostname)::text = (hosts.hostname)::text)
              ->  Seq Scan on hits_staging st  (cost=0.00..727.81 rows=33981 width=65) (actual time=0.014..34.757 rows=33981 loops=1)
              ->  Hash  (cost=70.46..70.46 rows=2546 width=35) (actual time=4.607..4.607 rows=2546 loops=1)
                    Buckets: 1024  Batches: 1  Memory Usage: 173kB
                    ->  Seq Scan on hosts  (cost=0.00..70.46 rows=2546 width=35) (actual time=0.016..2.051 rows=2546 loops=1)
        ->  Index Scan using index_hits_on_host_id_and_hit_on_and_http_status on hits  (cost=0.00..11.93 rows=2 width=126) (actual time=1.604..1.604 rows=0 loops=33981)
              Index Cond: ((host_id = hosts.id) AND (hit_on = st.hit_on) AND ((http_status)::text = (st.http_status)::text))
 Total runtime: 54652.909 ms
 (12 rows)
diff --git a/9.3.4.output b/9.3.4.output
 psql transition_development on [local] as russ
 => select count(*) from hits;
  count
 ──────────
 16775876
 (1 row)

 psql transition_development on [local] as russ
 => select count(*) from hits_staging;
 count
 ───────
 33981
 (1 row)

 psql transition_development on [local] as russ
 => EXPLAIN ANALYZE UPDATE hits
        SET count = st.count
        FROM hits_staging st
        INNER JOIN hosts ON hosts.hostname = st.hostname
        WHERE
          hits.path        = st.path AND
          hits.http_status = st.http_status AND
          hits.hit_on      = st.hit_on AND
          hits.host_id     = hosts.id AND
          hits.count       IS DISTINCT FROM st.count;
                                                                                    QUERY PLAN
 ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 Update on hits  (cost=76.17..292999.91 rows=2 width=128) (actual time=2305.906..2305.906 rows=0 loops=1)
   ->  Nested Loop  (cost=76.17..292999.91 rows=2 width=128) (actual time=2305.899..2305.899 rows=0 loops=1)
         ->  Hash Join  (cost=75.61..1313.13 rows=33981 width=60) (actual time=1.208..44.188 rows=33981 loops=1)
               Hash Cond: ((st.hostname)::text = (hosts.hostname)::text)
               ->  Seq Scan on hits_staging st  (cost=0.00..727.81 rows=33981 width=65) (actual time=0.008..18.709 rows=33981 loops=1)
               ->  Hash  (cost=50.27..50.27 rows=2027 width=36) (actual time=1.186..1.186 rows=2027 loops=1)
                     Buckets: 1024  Batches: 1  Memory Usage: 139kB
                     ->  Seq Scan on hosts  (cost=0.00..50.27 rows=2027 width=36) (actual time=0.019..0.583 rows=2027 loops=1)
         ->  Index Scan using index_hits_on_host_id_and_path_and_hit_on_and_http_status on hits  (cost=0.56..8.57 rows=1 width=116) (actual time=0.064..0.064 rows=0 loops=33981)
               Index Cond: ((host_id = hosts.id) AND (path = st.path) AND (hit_on = st.hit_on) AND ((http_status)::text = (st.http_status)::text))
               Filter: (count IS DISTINCT FROM st.count)
               Rows Removed by Filter: 1
 Total runtime: 2306.050 ms
 (13 rows)

 Time: 2310.341 ms
	QUERY PLAN
	-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	Update on hits (cost=111.65..925161.69 rows=13 width=130) (actual time=3022770.690..3022770.690 rows=0 loops=1)
	-> Nested Loop (cost=111.65..925161.69 rows=13 width=130) (actual time=3022770.687..3022770.687 rows=0 loops=1)
	Join Filter: ((hits.count IS DISTINCT FROM st.count) AND (st.path = (hits.path)::text))
	-> Hash Join (cost=111.65..1885.26 rows=47071 width=60) (actual time=5.025..312.918 rows=47071 loops=1)
	Hash Cond: ((st.hostname)::text = (hosts.hostname)::text)
	-> Seq Scan on hits_staging st (cost=0.00..1008.71 rows=47071 width=65) (actual time=0.020..112.589 rows=47071 loops=1)
	-> Hash (cost=79.62..79.62 rows=2562 width=35) (actual time=4.978..4.978 rows=2562 loops=1)
	Buckets: 1024 Batches: 1 Memory Usage: 174kB
	-> Seq Scan on hosts (cost=0.00..79.62 rows=2562 width=35) (actual time=0.012..2.099 rows=2562 loops=1)
	-> Index Scan using index_hits_on_host_id_and_hit_on_and_http_status on hits (cost=0.00..19.52 rows=4 width=118) (actual time=0.031..54.440 rows=41886 loops=47071)
	Index Cond: ((host_id = hosts.id) AND (hit_on = st.hit_on) AND ((http_status)::text = (st.http_status)::text))
	Total runtime: 3022770.954 ms
	(12 rows)
	transition_production=# select count(*) from hits;
	count
	----------
	11781100
	(1 row)

	transition_production=# select count(*) from hits_staging;
	count
	-------
	33981
	(1 row)

	transition_production=# EXPLAIN ANALYZE UPDATE hits
	SET count = st.count
	FROM hits_staging st
	INNER JOIN hosts ON hosts.hostname = st.hostname
	WHERE
	hits.path = st.path AND
	hits.http_status = st.http_status AND
	hits.hit_on = st.hit_on AND
	hits.host_id = hosts.id AND
	hits.count IS DISTINCT FROM st.count;
	QUERY PLAN
	Update on hits (cost=102.29..408357.07 rows=1 width=138) (actual time=54652.800..54652.800 rows=0 loops=1)
	-> Nested Loop (cost=102.29..408357.07 rows=1 width=138) (actual time=54652.798..54652.798 rows=0 loops=1)
	Join Filter: ((hits.count IS DISTINCT FROM st.count) AND (st.path = (hits.path)::text))
	-> Hash Join (cost=102.29..1339.81 rows=33981 width=60) (actual time=4.700..94.822 rows=33981 loops=1)
	Hash Cond: ((st.hostname)::text = (hosts.hostname)::text)
	-> Seq Scan on hits_staging st (cost=0.00..727.81 rows=33981 width=65) (actual time=0.014..34.757 rows=33981 loops=1)
	-> Hash (cost=70.46..70.46 rows=2546 width=35) (actual time=4.607..4.607 rows=2546 loops=1)
	Buckets: 1024 Batches: 1 Memory Usage: 173kB
	-> Seq Scan on hosts (cost=0.00..70.46 rows=2546 width=35) (actual time=0.016..2.051 rows=2546 loops=1)
	-> Index Scan using index_hits_on_host_id_and_hit_on_and_http_status on hits (cost=0.00..11.93 rows=2 width=126) (actual time=1.604..1.604 rows=0 loops=33981)
	Index Cond: ((host_id = hosts.id) AND (hit_on = st.hit_on) AND ((http_status)::text = (st.http_status)::text))
	Total runtime: 54652.909 ms
	(12 rows)
	psql transition_development on [local] as russ
	=> select count(*) from hits;
	count
	──────────
	16775876
	(1 row)

	psql transition_development on [local] as russ
	=> select count(*) from hits_staging;
	count
	───────
	33981
	(1 row)

	psql transition_development on [local] as russ
	=> EXPLAIN ANALYZE UPDATE hits
	SET count = st.count
	FROM hits_staging st
	INNER JOIN hosts ON hosts.hostname = st.hostname
	WHERE
	hits.path = st.path AND
	hits.http_status = st.http_status AND
	hits.hit_on = st.hit_on AND
	hits.host_id = hosts.id AND
	hits.count IS DISTINCT FROM st.count;
	QUERY PLAN
	──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
	Update on hits (cost=76.17..292999.91 rows=2 width=128) (actual time=2305.906..2305.906 rows=0 loops=1)
	-> Nested Loop (cost=76.17..292999.91 rows=2 width=128) (actual time=2305.899..2305.899 rows=0 loops=1)
	-> Hash Join (cost=75.61..1313.13 rows=33981 width=60) (actual time=1.208..44.188 rows=33981 loops=1)
	Hash Cond: ((st.hostname)::text = (hosts.hostname)::text)
	-> Seq Scan on hits_staging st (cost=0.00..727.81 rows=33981 width=65) (actual time=0.008..18.709 rows=33981 loops=1)
	-> Hash (cost=50.27..50.27 rows=2027 width=36) (actual time=1.186..1.186 rows=2027 loops=1)
	Buckets: 1024 Batches: 1 Memory Usage: 139kB
	-> Seq Scan on hosts (cost=0.00..50.27 rows=2027 width=36) (actual time=0.019..0.583 rows=2027 loops=1)
	-> Index Scan using index_hits_on_host_id_and_path_and_hit_on_and_http_status on hits (cost=0.56..8.57 rows=1 width=116) (actual time=0.064..0.064 rows=0 loops=33981)
	Index Cond: ((host_id = hosts.id) AND (path = st.path) AND (hit_on = st.hit_on) AND ((http_status)::text = (st.http_status)::text))
	Filter: (count IS DISTINCT FROM st.count)
	Rows Removed by Filter: 1
	Total runtime: 2306.050 ms
	(13 rows)

	Time: 2310.341 ms