Skip to content

Instantly share code, notes, and snippets.

@kmoppel
Created July 9, 2025 08:17
Show Gist options
  • Save kmoppel/0935766e8a68d952be7b362d2234185d to your computer and use it in GitHub Desktop.
Save kmoppel/0935766e8a68d952be7b362d2234185d to your computer and use it in GitHub Desktop.
Disables sync repl in case no streaming replicas for 1m
#!/usr/bin/env python3
import time
import psycopg
from psycopg.rows import dict_row
from datetime import datetime
CONNSTR = "dbname=postgres user=postgres port=5434"
MAX_ALLOWED_REPLICA_OFFLINE_SECONDS = 65
LOOP_SLEEP_SECONDS = 30
MAX_SCRIPT_RUNTIME_SECONDS = 3600*24*3
SQL_GET_SYNC_REPL_SETTINGS = "SELECT current_setting('synchronous_standby_names') synchronous_standby_names, current_setting('synchronous_commit') synchronous_commit;"
SQL_GET_STREAMING_REPLICAS_COUNT = "SELECT count(*) FROM pg_stat_replication WHERE state = 'streaming'"
SQLS_DISABLE_SYNC_REPL = ["alter system reset synchronous_standby_names", "alter system reset synchronous_commit", "select pg_reload_conf()"] # ALTER SYSTEM cannot run inside transactions
no_replica_state_start_time = 0
def execute_sql(sql, params=None) -> list:
with psycopg.connect(CONNSTR, autocommit=True) as conn:
with conn.cursor(row_factory=dict_row) as cur:
print("Running SQL:", sql, params)
cur.execute(sql, params)
if (cur.statusmessage and cur.statusmessage.startswith('SELECT')) or cur.description:
return cur.fetchall()
else:
return [{'rows_affected': str(cur.rowcount)}]
if __name__ == '__main__':
start_time = time.time()
print("Sync repl monitor / guard starting ... ")
print(f"MAX_ALLOWED_REPLICA_OFFLINE_SECONDS={MAX_ALLOWED_REPLICA_OFFLINE_SECONDS} ")
execute_sql("select 1;")
print("Connection to DB OK")
while True:
if time.time() > (start_time + MAX_SCRIPT_RUNTIME_SECONDS):
print("Exiting due to MAX_SCRIPT_RUNTIME_SECONDS", MAX_SCRIPT_RUNTIME_SECONDS)
exit()
try:
ret_sr_settings = execute_sql(SQL_GET_SYNC_REPL_SETTINGS)
print("ret_sr_settings", ret_sr_settings)
if ret_sr_settings[0]['synchronous_standby_names'] == '' or not ret_sr_settings[0]['synchronous_commit'] in ('on', 'remote_apply', 'remote_write'):
print("Sync repl not enabled, nothing to do")
no_replica_state_start_time = 0
continue
ret_replica_count = execute_sql(SQL_GET_STREAMING_REPLICAS_COUNT)
print("ret_replica_count", ret_replica_count)
if not ret_replica_count[0]["count"]:
if not no_replica_state_start_time:
print("WARNING - sync repl with no streaming replicas detected! Time:", datetime.now())
no_replica_state_start_time = time.time()
else:
if no_replica_state_start_time: # Replica came back before MAX_ALLOWED_REPLICA_OFFLINE_SECONDS
print("Replica ONLINE again")
no_replica_state_start_time = 0
if no_replica_state_start_time and time.time() - no_replica_state_start_time > MAX_ALLOWED_REPLICA_OFFLINE_SECONDS:
for sql in SQLS_DISABLE_SYNC_REPL:
execute_sql(sql)
print("Sync repl disabled - exiting")
exit()
except Exception as e:
print('Caught', e)
finally:
print(f"Loop finished - sleeping for {LOOP_SLEEP_SECONDS}s ...")
time.sleep(LOOP_SLEEP_SECONDS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment