Created
September 22, 2021 21:58
-
-
Save imajes/21a0e634842a2b6fbb55e9d209e53769 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- new table bloat query | |
-- still needs work; is often off by +/- 20% | |
WITH constants AS ( | |
-- define some constants for sizes of things | |
-- for reference down the query and easy maintenance | |
SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 8 AS ma | |
), | |
no_stats AS ( | |
-- screen out table who have attributes | |
-- which dont have stats, such as JSON | |
SELECT table_schema, table_name, | |
n_live_tup::numeric as est_rows, | |
pg_table_size(relid)::numeric as table_size | |
FROM information_schema.columns | |
JOIN pg_stat_user_tables as psut | |
ON table_schema = psut.schemaname | |
AND table_name = psut.relname | |
LEFT OUTER JOIN pg_stats | |
ON table_schema = pg_stats.schemaname | |
AND table_name = pg_stats.tablename | |
AND column_name = attname | |
WHERE attname IS NULL | |
AND table_schema NOT IN ('pg_catalog', 'information_schema') | |
GROUP BY table_schema, table_name, relid, n_live_tup | |
), | |
null_headers AS ( | |
-- calculate null header sizes | |
-- omitting tables which dont have complete stats | |
-- and attributes which aren't visible | |
SELECT | |
hdr+1+(sum(case when null_frac <> 0 THEN 1 else 0 END)/8) as nullhdr, | |
SUM((1-null_frac)*avg_width) as datawidth, | |
MAX(null_frac) as maxfracsum, | |
schemaname, | |
tablename, | |
hdr, ma, bs | |
FROM pg_stats CROSS JOIN constants | |
LEFT OUTER JOIN no_stats | |
ON schemaname = no_stats.table_schema | |
AND tablename = no_stats.table_name | |
WHERE schemaname NOT IN ('pg_catalog', 'information_schema') | |
AND no_stats.table_name IS NULL | |
AND EXISTS ( SELECT 1 | |
FROM information_schema.columns | |
WHERE schemaname = columns.table_schema | |
AND tablename = columns.table_name ) | |
GROUP BY schemaname, tablename, hdr, ma, bs | |
), | |
data_headers AS ( | |
-- estimate header and row size | |
SELECT | |
ma, bs, hdr, schemaname, tablename, | |
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr, | |
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2 | |
FROM null_headers | |
), | |
table_estimates AS ( | |
-- make estimates of how large the table should be | |
-- based on row and page size | |
SELECT schemaname, tablename, bs, | |
reltuples::numeric as est_rows, relpages * bs as table_bytes, | |
CEIL((reltuples* | |
(datahdr + nullhdr2 + 4 + ma - | |
(CASE WHEN datahdr%ma=0 | |
THEN ma ELSE datahdr%ma END) | |
)/(bs-20))) * bs AS expected_bytes, | |
reltoastrelid | |
FROM data_headers | |
JOIN pg_class ON tablename = relname | |
JOIN pg_namespace ON relnamespace = pg_namespace.oid | |
AND schemaname = nspname | |
WHERE pg_class.relkind = 'r' | |
), | |
estimates_with_toast AS ( | |
-- add in estimated TOAST table sizes | |
-- estimate based on 4 toast tuples per page because we dont have | |
-- anything better. also append the no_data tables | |
SELECT schemaname, tablename, | |
TRUE as can_estimate, | |
est_rows, | |
table_bytes + ( coalesce(toast.relpages, 0) * bs ) as table_bytes, | |
expected_bytes + ( ceil( coalesce(toast.reltuples, 0) / 4 ) * bs ) as expected_bytes | |
FROM table_estimates LEFT OUTER JOIN pg_class as toast | |
ON table_estimates.reltoastrelid = toast.oid | |
AND toast.relkind = 't' | |
), | |
table_estimates_plus AS ( | |
-- add some extra metadata to the table data | |
-- and calculations to be reused | |
-- including whether we cant estimate it | |
-- or whether we think it might be compressed | |
SELECT current_database() as databasename, | |
schemaname, tablename, can_estimate, | |
est_rows, | |
CASE WHEN table_bytes > 0 | |
THEN table_bytes::NUMERIC | |
ELSE NULL::NUMERIC END | |
AS table_bytes, | |
CASE WHEN expected_bytes > 0 | |
THEN expected_bytes::NUMERIC | |
ELSE NULL::NUMERIC END | |
AS expected_bytes, | |
CASE WHEN expected_bytes > 0 AND table_bytes > 0 | |
AND expected_bytes <= table_bytes | |
THEN (table_bytes - expected_bytes)::NUMERIC | |
ELSE 0::NUMERIC END AS bloat_bytes | |
FROM estimates_with_toast | |
UNION ALL | |
SELECT current_database() as databasename, | |
table_schema, table_name, FALSE, | |
est_rows, table_size, | |
NULL::NUMERIC, NULL::NUMERIC | |
FROM no_stats | |
), | |
bloat_data AS ( | |
-- do final math calculations and formatting | |
select current_database() as databasename, | |
schemaname, tablename, can_estimate, | |
table_bytes, round(table_bytes/(1024^2)::NUMERIC,3) as table_mb, | |
expected_bytes, round(expected_bytes/(1024^2)::NUMERIC,3) as expected_mb, | |
round(bloat_bytes*100/table_bytes) as pct_bloat, | |
round(bloat_bytes/(1024::NUMERIC^2),2) as mb_bloat, | |
table_bytes as actual_table_bytes, expected_bytes as actual_expected_bytes, est_rows | |
FROM table_estimates_plus | |
) | |
-- filter output for bloated tables | |
SELECT databasename, schemaname, tablename, | |
can_estimate, | |
est_rows, | |
pct_bloat, mb_bloat, | |
table_mb, | |
expected_mb, | |
actual_table_bytes, actual_expected_bytes | |
FROM bloat_data | |
-- this where clause defines which tables actually appear | |
-- in the bloat chart | |
-- example below filters for tables which are either 50% | |
-- bloated and more than 20mb in size, or more than 2`5% | |
-- bloated and more than 4GB in size | |
WHERE pct_bloat is not null | |
-- AND ( pct_bloat >= 50 AND mb_bloat >= 10 ) | |
-- OR ( pct_bloat >= 25 AND mb_bloat >= 1000 ) | |
ORDER BY pct_bloat desc, mb_bloat DESC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment