Created
March 6, 2026 17:48
-
-
Save bensheldon/1785f83e09312419e1ed777c6064915e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # frozen_string_literal: true | |
| class TunePostgresJob < ApplicationJob | |
| # Global autovacuum setting thresholds. If any global setting exceeds its threshold, | |
| # apply the threshold value as a per-table override. If the global setting is already | |
| # at or below the threshold, remove any per-table override so the global value applies. | |
| AUTOVACUUM_SETTINGS = { | |
| autovacuum_vacuum_scale_factor: 0.01, | |
| autovacuum_analyze_scale_factor: 0.01, | |
| autovacuum_vacuum_insert_scale_factor: 0.01, | |
| autovacuum_vacuum_threshold: 100, | |
| autovacuum_analyze_threshold: 100, | |
| autovacuum_vacuum_insert_threshold: 100, | |
| }.freeze | |
| # Heuristic thresholds for index bloat detection. | |
| # A table needs its indexes rebuilt when a significant portion of all tuples are dead, | |
| # indicating accumulated index bloat from high-churn write patterns. | |
| REINDEX_DEAD_TUPLE_RATIO = 0.2 | |
| REINDEX_DEAD_TUPLE_MIN = 1_000 | |
| def perform | |
| ApplicationRecord.with_connection do |conn| | |
| drop_invalid_indexes(conn) | |
| tune_autovacuum_settings(conn) | |
| vacuum_stale_tables(conn) | |
| reindex_bloated_tables(conn) | |
| end | |
| end | |
| private | |
| def drop_invalid_indexes(conn) | |
| # Failed concurrent index builds (CREATE INDEX CONCURRENTLY or REINDEX TABLE CONCURRENTLY) | |
| # leave behind invalid indexes marked with indisvalid = false. These indexes are still | |
| # maintained on writes but never used for queries, so they must be dropped. | |
| sql = <<~SQL | |
| SELECT quote_ident(n.nspname) || '.' || quote_ident(i.relname) AS full_name | |
| FROM pg_index ix | |
| JOIN pg_class i ON i.oid = ix.indexrelid | |
| JOIN pg_class t ON t.oid = ix.indrelid | |
| JOIN pg_namespace n ON n.oid = t.relnamespace | |
| WHERE NOT ix.indisvalid | |
| AND n.nspname = ANY(current_schemas(false)) | |
| SQL | |
| conn.execute(sql).each do |row| | |
| conn.execute("DROP INDEX IF EXISTS #{row["full_name"]}") | |
| end | |
| end | |
| def tune_autovacuum_settings(conn) | |
| settings = global_settings(conn) | |
| to_set = AUTOVACUUM_SETTINGS.select { |name, threshold| settings[name].to_f > threshold } | |
| to_reset = AUTOVACUUM_SETTINGS.keys - to_set.keys | |
| conn.tables.each do |table| | |
| quoted = conn.quote_table_name(table) | |
| if to_set.any? | |
| set_params = to_set.map { |k, v| "#{k} = #{v}" }.join(", ") | |
| conn.execute("ALTER TABLE #{quoted} SET (#{set_params})") | |
| end | |
| if to_reset.any? | |
| reset_params = to_reset.join(", ") | |
| conn.execute("ALTER TABLE #{quoted} RESET (#{reset_params})") | |
| end | |
| end | |
| end | |
| def global_settings(conn) | |
| names = AUTOVACUUM_SETTINGS.keys.map { |k| "'#{k}'" }.join(", ") | |
| sql = <<~SQL | |
| SELECT name, setting::float AS value | |
| FROM pg_settings | |
| WHERE name IN (#{names}) | |
| SQL | |
| conn.execute(sql).each_with_object({}) do |row, hash| | |
| hash[row["name"].to_sym] = row["value"].to_f | |
| end | |
| end | |
| def vacuum_stale_tables(conn) | |
| return if conn.open_transactions > 0 | |
| sql = <<~SQL | |
| SELECT quote_ident(schemaname) || '.' || quote_ident(relname) AS full_name | |
| FROM pg_stat_user_tables | |
| WHERE schemaname = ANY(current_schemas(false)) | |
| AND ( | |
| GREATEST(last_vacuum, last_autovacuum) < NOW() - INTERVAL '1 week' | |
| OR (last_vacuum IS NULL AND last_autovacuum IS NULL) | |
| OR GREATEST(last_analyze, last_autoanalyze) < NOW() - INTERVAL '1 week' | |
| OR (last_analyze IS NULL AND last_autoanalyze IS NULL) | |
| ) | |
| SQL | |
| conn.execute(sql).each do |row| | |
| conn.execute("VACUUM ANALYZE #{row["full_name"]}") | |
| end | |
| end | |
| def reindex_bloated_tables(conn) | |
| return if conn.open_transactions > 0 | |
| # Dead tuples in the table heap correspond to dead entries in all its indexes. | |
| # Once autovacuum reclaims heap space, those index pages remain bloated until | |
| # VACUUM cleans them up or the index is rebuilt. Tables with >= 20% dead tuple | |
| # ratio and at least 1,000 dead tuples are candidates for concurrent reindexing. | |
| sql = <<~SQL | |
| SELECT quote_ident(schemaname) || '.' || quote_ident(relname) AS full_name | |
| FROM pg_stat_user_tables | |
| WHERE schemaname = ANY(current_schemas(false)) | |
| AND n_dead_tup >= #{REINDEX_DEAD_TUPLE_MIN} | |
| AND n_dead_tup::float / GREATEST(n_live_tup + n_dead_tup, 1) >= #{REINDEX_DEAD_TUPLE_RATIO} | |
| AND relid NOT IN (SELECT relid FROM pg_stat_progress_create_index) | |
| SQL | |
| conn.execute(sql).each do |row| | |
| conn.execute("REINDEX TABLE CONCURRENTLY #{row["full_name"]}") | |
| end | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment