Created
April 6, 2023 15:56
-
-
Save jiaaro/3be34274b7b69bf8fdcfc430297c1f4b to your computer and use it in GitHub Desktop.
Bloat lightning talk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Setup | |
drop table bloattable; | |
create table bloattable ( | |
a int primary key, | |
b int not null, | |
x uuid not null, | |
y uuid not null | |
); | |
create index bloatindex_b_idx on bloattable (x uuid_ops); | |
-- Create initial data | |
insert into bloattable (a, b, x, y) | |
select | |
i, | |
100, | |
gen_random_uuid(), | |
gen_random_uuid() | |
from generate_series(1, 1000) i; | |
-- Run this to generate bloat. Run it more times for more bloat; | |
update bloattable | |
set b = floor(random() * 100 + 1)::int; | |
-- Delete all data to get 100% bloat | |
delete from bloattable where true; | |
-- Hanging transaction, don't commit/end and autovacuum | |
-- will not be able to clean up the | |
begin; | |
select * | |
from bloattable; | |
-- keep this open in a | |
end; | |
-- doesn't fix bloat: | |
vacuum bloattable; | |
-- fixed the bloat but locks the whole table in the meanwhile | |
vacuum full bloattable; | |
-- fixes bloat online, without locking table: https://reorg.github.io/pg_repack/ | |
create extension pg_repack; | |
/* | |
* Actual fixing of bloat is done using the pg_repack binary, | |
* running outside the DB: | |
* - https://hub.packtpub.com/rob-sullivan-using-pg_repack-in-aws-rds-from-planet-postgresql/ | |
* | |
* /usr/local/bin/pg_repack-1.4.3/pg_repack -U postgres -h 127.0.0.1 -k -d mydb -t bloattable -j 4 | |
*/ | |
-- you have to analyze to get up-to-date bloat estimates | |
analyze bloattable; | |
-- Calculate table bloat | |
SELECT current_database(), schemaname, tblname, bs*tblpages AS real_size, | |
(tblpages-est_tblpages)*bs AS extra_size, | |
CASE WHEN tblpages > 0 AND tblpages - est_tblpages > 0 | |
THEN 100 * (tblpages - est_tblpages)/tblpages::float | |
ELSE 0 | |
END AS extra_pct, fillfactor, | |
CASE WHEN tblpages - est_tblpages_ff > 0 | |
THEN (tblpages-est_tblpages_ff)*bs | |
ELSE 0 | |
END AS bloat_size, | |
CASE WHEN tblpages > 0 AND tblpages - est_tblpages_ff > 0 | |
THEN 100 * (tblpages - est_tblpages_ff)/tblpages::float | |
ELSE 0 | |
END AS bloat_pct, is_na | |
-- , tpl_hdr_size, tpl_data_size, (pst).free_percent + (pst).dead_tuple_percent AS real_frag -- (DEBUG INFO) | |
FROM ( | |
SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages, | |
ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) AS est_tblpages_ff, | |
tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na | |
-- , tpl_hdr_size, tpl_data_size, pgstattuple(tblid) AS pst -- (DEBUG INFO) | |
FROM ( | |
SELECT | |
( 4 + tpl_hdr_size + tpl_data_size + (2*ma) | |
- CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END | |
- CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END | |
) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages, | |
toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na | |
-- , tpl_hdr_size, tpl_data_size | |
FROM ( | |
SELECT | |
tbl.oid AS tblid, ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples, | |
tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages, | |
coalesce(toast.reltuples, 0) AS toasttuples, | |
coalesce(substring( | |
array_to_string(tbl.reloptions, ' ') | |
FROM 'fillfactor=([0-9]+)')::smallint, 100) AS fillfactor, | |
current_setting('block_size')::numeric AS bs, | |
CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma, | |
24 AS page_hdr, | |
23 + CASE WHEN MAX(coalesce(s.null_frac,0)) > 0 THEN ( 7 + count(s.attname) ) / 8 ELSE 0::int END | |
+ CASE WHEN bool_or(att.attname = 'oid' and att.attnum < 0) THEN 4 ELSE 0 END AS tpl_hdr_size, | |
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) AS tpl_data_size, | |
bool_or(att.atttypid = 'pg_catalog.name'::regtype) | |
OR sum(CASE WHEN att.attnum > 0 THEN 1 ELSE 0 END) <> count(s.attname) AS is_na | |
FROM pg_attribute AS att | |
JOIN pg_class AS tbl ON att.attrelid = tbl.oid | |
JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace | |
LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname | |
AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname | |
LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid | |
WHERE NOT att.attisdropped | |
AND tbl.relkind in ('r','m') | |
GROUP BY 1,2,3,4,5,6,7,8,9,10 | |
ORDER BY 2,3 | |
) AS s | |
) AS s2 | |
) AS s3 | |
WHERE NOT is_na | |
ORDER BY schemaname, tblname; | |
-- Get index bloat | |
SELECT current_database(), nspname AS schemaname, tblname, idxname, bs*(relpages)::bigint AS real_size, | |
bs*(relpages-est_pages)::bigint AS extra_size, | |
100 * (relpages-est_pages)::float / relpages AS extra_pct, | |
fillfactor, | |
CASE WHEN relpages > est_pages_ff | |
THEN bs*(relpages-est_pages_ff) | |
ELSE 0 | |
END AS bloat_size, | |
100 * (relpages-est_pages_ff)::float / relpages AS bloat_pct, | |
is_na | |
-- , 100-(pst).avg_leaf_density AS pst_avg_bloat, est_pages, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples, relpages -- (DEBUG INFO) | |
FROM ( | |
SELECT coalesce(1 + | |
ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth) | |
) AS est_pages, | |
coalesce(1 + | |
ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0 | |
) AS est_pages_ff, | |
bs, nspname, tblname, idxname, relpages, fillfactor, is_na | |
-- , pgstatindex(idxoid) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO) | |
FROM ( | |
SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor, | |
( index_tuple_hdr_bm + | |
maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN | |
WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign | |
ELSE index_tuple_hdr_bm%maxalign | |
END | |
+ nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN | |
WHEN nulldatawidth = 0 THEN 0 | |
WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign | |
ELSE nulldatawidth::integer%maxalign | |
END | |
)::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na | |
-- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO) | |
FROM ( | |
SELECT n.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, | |
i.idxoid, i.fillfactor, current_setting('block_size')::numeric AS bs, | |
CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?) | |
WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8 | |
ELSE 4 | |
END AS maxalign, | |
/* per page header, fixed size: 20 for 7.X, 24 for others */ | |
24 AS pagehdr, | |
/* per page btree opaque data */ | |
16 AS pageopqdata, | |
/* per tuple header: add IndexAttributeBitMapData if some cols are null-able */ | |
CASE WHEN max(coalesce(s.null_frac,0)) = 0 | |
THEN 8 -- IndexTupleData size | |
ELSE 8 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8) | |
END AS index_tuple_hdr_bm, | |
/* data len: we remove null values save space using it fractionnal part from stats */ | |
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS nulldatawidth, | |
max( CASE WHEN i.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na | |
FROM ( | |
SELECT ct.relname AS tblname, ct.relnamespace, ic.idxname, ic.attpos, ic.indkey, ic.indkey[ic.attpos], ic.reltuples, ic.relpages, ic.tbloid, ic.idxoid, ic.fillfactor, | |
coalesce(a1.attnum, a2.attnum) AS attnum, coalesce(a1.attname, a2.attname) AS attname, coalesce(a1.atttypid, a2.atttypid) AS atttypid, | |
CASE WHEN a1.attnum IS NULL | |
THEN ic.idxname | |
ELSE ct.relname | |
END AS attrelname | |
FROM ( | |
SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey, | |
pg_catalog.generate_series(1,indnatts) AS attpos | |
FROM ( | |
SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid, | |
i.indexrelid AS idxoid, | |
coalesce(substring( | |
array_to_string(ci.reloptions, ' ') | |
from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor, | |
i.indnatts, | |
pg_catalog.string_to_array(pg_catalog.textin( | |
pg_catalog.int2vectorout(i.indkey)),' ')::int[] AS indkey | |
FROM pg_catalog.pg_index i | |
JOIN pg_catalog.pg_class ci ON ci.oid = i.indexrelid | |
WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree') | |
AND ci.relpages > 0 | |
) AS idx_data | |
) AS ic | |
JOIN pg_catalog.pg_class ct ON ct.oid = ic.tbloid | |
LEFT JOIN pg_catalog.pg_attribute a1 ON | |
ic.indkey[ic.attpos] <> 0 | |
AND a1.attrelid = ic.tbloid | |
AND a1.attnum = ic.indkey[ic.attpos] | |
LEFT JOIN pg_catalog.pg_attribute a2 ON | |
ic.indkey[ic.attpos] = 0 | |
AND a2.attrelid = ic.idxoid | |
AND a2.attnum = ic.attpos | |
) i | |
JOIN pg_catalog.pg_namespace n ON n.oid = i.relnamespace | |
JOIN pg_catalog.pg_stats s ON s.schemaname = n.nspname | |
AND s.tablename = i.attrelname | |
AND s.attname = i.attname | |
GROUP BY 1,2,3,4,5,6,7,8,9,10,11 | |
) AS rows_data_stats | |
) AS rows_hdr_pdg_stats | |
) AS relation_stats | |
where tblname = 'bloattable' | |
ORDER BY nspname, tblname, idxname; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment