Skip to content

Instantly share code, notes, and snippets.

@bensheldon
Created March 6, 2026 17:48
Show Gist options
  • Select an option

  • Save bensheldon/1785f83e09312419e1ed777c6064915e to your computer and use it in GitHub Desktop.

Select an option

Save bensheldon/1785f83e09312419e1ed777c6064915e to your computer and use it in GitHub Desktop.
# frozen_string_literal: true
class TunePostgresJob < ApplicationJob
# Global autovacuum setting thresholds. If any global setting exceeds its threshold,
# apply the threshold value as a per-table override. If the global setting is already
# at or below the threshold, remove any per-table override so the global value applies.
AUTOVACUUM_SETTINGS = {
autovacuum_vacuum_scale_factor: 0.01,
autovacuum_analyze_scale_factor: 0.01,
autovacuum_vacuum_insert_scale_factor: 0.01,
autovacuum_vacuum_threshold: 100,
autovacuum_analyze_threshold: 100,
autovacuum_vacuum_insert_threshold: 100,
}.freeze
# Heuristic thresholds for index bloat detection.
# A table needs its indexes rebuilt when a significant portion of all tuples are dead,
# indicating accumulated index bloat from high-churn write patterns.
REINDEX_DEAD_TUPLE_RATIO = 0.2
REINDEX_DEAD_TUPLE_MIN = 1_000
def perform
ApplicationRecord.with_connection do |conn|
drop_invalid_indexes(conn)
tune_autovacuum_settings(conn)
vacuum_stale_tables(conn)
reindex_bloated_tables(conn)
end
end
private
def drop_invalid_indexes(conn)
# Failed concurrent index builds (CREATE INDEX CONCURRENTLY or REINDEX TABLE CONCURRENTLY)
# leave behind invalid indexes marked with indisvalid = false. These indexes are still
# maintained on writes but never used for queries, so they must be dropped.
sql = <<~SQL
SELECT quote_ident(n.nspname) || '.' || quote_ident(i.relname) AS full_name
FROM pg_index ix
JOIN pg_class i ON i.oid = ix.indexrelid
JOIN pg_class t ON t.oid = ix.indrelid
JOIN pg_namespace n ON n.oid = t.relnamespace
WHERE NOT ix.indisvalid
AND n.nspname = ANY(current_schemas(false))
SQL
conn.execute(sql).each do |row|
conn.execute("DROP INDEX IF EXISTS #{row["full_name"]}")
end
end
def tune_autovacuum_settings(conn)
settings = global_settings(conn)
to_set = AUTOVACUUM_SETTINGS.select { |name, threshold| settings[name].to_f > threshold }
to_reset = AUTOVACUUM_SETTINGS.keys - to_set.keys
conn.tables.each do |table|
quoted = conn.quote_table_name(table)
if to_set.any?
set_params = to_set.map { |k, v| "#{k} = #{v}" }.join(", ")
conn.execute("ALTER TABLE #{quoted} SET (#{set_params})")
end
if to_reset.any?
reset_params = to_reset.join(", ")
conn.execute("ALTER TABLE #{quoted} RESET (#{reset_params})")
end
end
end
def global_settings(conn)
names = AUTOVACUUM_SETTINGS.keys.map { |k| "'#{k}'" }.join(", ")
sql = <<~SQL
SELECT name, setting::float AS value
FROM pg_settings
WHERE name IN (#{names})
SQL
conn.execute(sql).each_with_object({}) do |row, hash|
hash[row["name"].to_sym] = row["value"].to_f
end
end
def vacuum_stale_tables(conn)
return if conn.open_transactions > 0
sql = <<~SQL
SELECT quote_ident(schemaname) || '.' || quote_ident(relname) AS full_name
FROM pg_stat_user_tables
WHERE schemaname = ANY(current_schemas(false))
AND (
GREATEST(last_vacuum, last_autovacuum) < NOW() - INTERVAL '1 week'
OR (last_vacuum IS NULL AND last_autovacuum IS NULL)
OR GREATEST(last_analyze, last_autoanalyze) < NOW() - INTERVAL '1 week'
OR (last_analyze IS NULL AND last_autoanalyze IS NULL)
)
SQL
conn.execute(sql).each do |row|
conn.execute("VACUUM ANALYZE #{row["full_name"]}")
end
end
def reindex_bloated_tables(conn)
return if conn.open_transactions > 0
# Dead tuples in the table heap correspond to dead entries in all its indexes.
# Once autovacuum reclaims heap space, those index pages remain bloated until
# VACUUM cleans them up or the index is rebuilt. Tables with >= 20% dead tuple
# ratio and at least 1,000 dead tuples are candidates for concurrent reindexing.
sql = <<~SQL
SELECT quote_ident(schemaname) || '.' || quote_ident(relname) AS full_name
FROM pg_stat_user_tables
WHERE schemaname = ANY(current_schemas(false))
AND n_dead_tup >= #{REINDEX_DEAD_TUPLE_MIN}
AND n_dead_tup::float / GREATEST(n_live_tup + n_dead_tup, 1) >= #{REINDEX_DEAD_TUPLE_RATIO}
AND relid NOT IN (SELECT relid FROM pg_stat_progress_create_index)
SQL
conn.execute(sql).each do |row|
conn.execute("REINDEX TABLE CONCURRENTLY #{row["full_name"]}")
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment