Skip to content

Instantly share code, notes, and snippets.

@robertrossmann
Last active January 26, 2024 03:00
Show Gist options
  • Save robertrossmann/fa08cf8e612c6a6cd0298a48b047e946 to your computer and use it in GitHub Desktop.
Save robertrossmann/fa08cf8e612c6a6cd0298a48b047e946 to your computer and use it in GitHub Desktop.
Postgres diagnostics
-- List all existing indexes and include some useful info about them (incl. the index's definition)
SELECT
schemaname AS schemaname,
t.relname AS tablename,
ix.relname AS indexname,
regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\(]*\((.*)\)$', '\1') AS columns,
regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \(.*', '\1') AS algorithm,
indisunique AS UNIQUE,
indisprimary AS PRIMARY,
indisvalid AS valid,
pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
idx_scan AS indexscans,
idx_tup_read AS tuplereads,
idx_tup_fetch AS tuplefetches,
pg_get_indexdef(i.indexrelid) AS definition
FROM
pg_index i
INNER JOIN pg_class t ON t.oid = i.indrelid
INNER JOIN pg_class ix ON ix.oid = i.indexrelid
LEFT JOIN pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid
WHERE
schemaname IS NOT NULL
ORDER BY
schemaname ASC,
tablename ASC,
indexname ASC;
select * from pg_stat_user_indexes;
-- Enable the auto_explain plugin. Useful for doing `explain analyze` on PG functions
-- which consist of many nested statements you need to know which of those statements
-- are taking long and why.
-- Sadly, this plugin is not available on Heroku Postgres. You should migrate to RDS. :)
-- AWS RDS
LOAD '$libdir/plugins/auto_explain';
-- General PG installation
LOAD 'auto_explain';
SET auto_explain.log_format=text;
SET auto_explain.log_analyze = true;
SET auto_explain.log_nested_statements = true;
SET auto_explain.log_timing = true;
-- Set this to 0 to enable analysis on all statements
SET auto_explain.log_min_duration = 500;
-- Optionally send all the auto_explain messages to your preferred GUI client
-- Otherwise the messages might only appear in the db logs
SET client_min_messages = log;
-- See which queries are being blocked by which other query.
-- Note that seeing SOMETHING here does not necessarily mean a problem, but the results
-- should change quickly. Only when a statement hangs for too long it is a cause for concern.
SELECT
blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
blocking_locks.pid AS blocking_pid,
blocking_activity.usename AS blocking_user,
blocked_activity.query AS blocked_statement,
blocking_activity.query AS current_statement_in_blocking_process,
blocked_activity.xact_start AS query_start
FROM
pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE
NOT blocked_locks.GRANTED
ORDER BY
blocking_pid;
-- See how much of the table and index data is current, live rows and how much of it is dead rows.
-- If you see big numbers in the ratios, it might be time to run `VACUUM` or, if you can affort downtime, `VACCUM FULL`.
-- Do not forget to also do `ANALYZE`, either after you have vacuumed or as part of the vacuuming itself with
-- `VACUUM [FULL] ANALYZE;`.
SELECT
current_database(), schemaname, tablename, /*reltuples::bigint, relpages::bigint, otta,*/
ROUND((CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages::FLOAT/otta END)::NUMERIC,1) AS tbloat,
CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END AS wastedbytes,
iname, /*ituples::bigint, ipages::bigint, iotta,*/
ROUND((CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages::FLOAT/iotta END)::NUMERIC,1) AS ibloat,
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes
FROM (
SELECT
schemaname, tablename, cc.reltuples, cc.relpages, bs,
CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::FLOAT)) AS otta,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::FLOAT)),0) AS iotta -- very rough approximation, assumes all cols
FROM (
SELECT
ma,bs,schemaname,tablename,
(datawidth+(hdr+ma-(CASE WHEN hdr%ma=0 THEN ma ELSE hdr%ma END)))::NUMERIC AS datahdr,
(maxfracsum*(nullhdr+ma-(CASE WHEN nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
schemaname, tablename, hdr, ma, bs,
SUM((1-null_frac)*avg_width) AS datawidth,
MAX(null_frac) AS maxfracsum,
hdr+(
SELECT 1+COUNT(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, (
SELECT
(SELECT current_setting('block_size')::NUMERIC) AS bs,
CASE WHEN SUBSTRING(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
FROM (SELECT version() AS v) AS foo
) AS constants
GROUP BY 1,2,3,4,5
) AS foo
) AS rs
JOIN pg_class cc ON cc.relname = rs.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml
WHERE schemaname = 'public'
ORDER BY wastedbytes DESC;
-- See on which tables PG is doing a lot of sequential scans. On small tables
-- a seq scan is more efficient than an index scan so always think if adding
-- an index would really make sense.
-- Sadly, it does not show which fields are "hot", only the tables names.
SELECT
relname as table,
pg_size_pretty(pg_relation_size(relid::regclass)) AS size,
seq_scan as sequential_scans,
idx_scan as index_scans,
seq_scan - idx_scan AS difference,
CASE WHEN seq_scan - idx_scan > 0 THEN
'Missing Index?'
ELSE
'OK'
END AS status
FROM
pg_stat_all_tables
WHERE
schemaname = 'public'
AND pg_relation_size(relid::regclass) > 80000
ORDER BY
difference DESC;
-- See which indexes are not being used or are being used seldomly.
-- Change the `idx_scan` query filter to your liking - for heavy workloads,
-- even an index with several thousand hits could be potentially "useless"
-- if other indexes have hit rates in millions.
SELECT
schemaname AS schemaname,
relname AS tablename,
indexrelname AS indexname,
pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
idx_scan AS indexscans
FROM
pg_stat_user_indexes ui
INNER JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE
NOT indisunique
AND idx_scan <= 50
ORDER BY
pg_relation_size(i.indexrelid) DESC,
relname ASC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment