Created
July 9, 2025 08:17
-
-
Save kmoppel/0935766e8a68d952be7b362d2234185d to your computer and use it in GitHub Desktop.
Disables sync repl in case no streaming replicas for 1m
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import time | |
import psycopg | |
from psycopg.rows import dict_row | |
from datetime import datetime | |
CONNSTR = "dbname=postgres user=postgres port=5434" | |
MAX_ALLOWED_REPLICA_OFFLINE_SECONDS = 65 | |
LOOP_SLEEP_SECONDS = 30 | |
MAX_SCRIPT_RUNTIME_SECONDS = 3600*24*3 | |
SQL_GET_SYNC_REPL_SETTINGS = "SELECT current_setting('synchronous_standby_names') synchronous_standby_names, current_setting('synchronous_commit') synchronous_commit;" | |
SQL_GET_STREAMING_REPLICAS_COUNT = "SELECT count(*) FROM pg_stat_replication WHERE state = 'streaming'" | |
SQLS_DISABLE_SYNC_REPL = ["alter system reset synchronous_standby_names", "alter system reset synchronous_commit", "select pg_reload_conf()"] # ALTER SYSTEM cannot run inside transactions | |
no_replica_state_start_time = 0 | |
def execute_sql(sql, params=None) -> list: | |
with psycopg.connect(CONNSTR, autocommit=True) as conn: | |
with conn.cursor(row_factory=dict_row) as cur: | |
print("Running SQL:", sql, params) | |
cur.execute(sql, params) | |
if (cur.statusmessage and cur.statusmessage.startswith('SELECT')) or cur.description: | |
return cur.fetchall() | |
else: | |
return [{'rows_affected': str(cur.rowcount)}] | |
if __name__ == '__main__': | |
start_time = time.time() | |
print("Sync repl monitor / guard starting ... ") | |
print(f"MAX_ALLOWED_REPLICA_OFFLINE_SECONDS={MAX_ALLOWED_REPLICA_OFFLINE_SECONDS} ") | |
execute_sql("select 1;") | |
print("Connection to DB OK") | |
while True: | |
if time.time() > (start_time + MAX_SCRIPT_RUNTIME_SECONDS): | |
print("Exiting due to MAX_SCRIPT_RUNTIME_SECONDS", MAX_SCRIPT_RUNTIME_SECONDS) | |
exit() | |
try: | |
ret_sr_settings = execute_sql(SQL_GET_SYNC_REPL_SETTINGS) | |
print("ret_sr_settings", ret_sr_settings) | |
if ret_sr_settings[0]['synchronous_standby_names'] == '' or not ret_sr_settings[0]['synchronous_commit'] in ('on', 'remote_apply', 'remote_write'): | |
print("Sync repl not enabled, nothing to do") | |
no_replica_state_start_time = 0 | |
continue | |
ret_replica_count = execute_sql(SQL_GET_STREAMING_REPLICAS_COUNT) | |
print("ret_replica_count", ret_replica_count) | |
if not ret_replica_count[0]["count"]: | |
if not no_replica_state_start_time: | |
print("WARNING - sync repl with no streaming replicas detected! Time:", datetime.now()) | |
no_replica_state_start_time = time.time() | |
else: | |
if no_replica_state_start_time: # Replica came back before MAX_ALLOWED_REPLICA_OFFLINE_SECONDS | |
print("Replica ONLINE again") | |
no_replica_state_start_time = 0 | |
if no_replica_state_start_time and time.time() - no_replica_state_start_time > MAX_ALLOWED_REPLICA_OFFLINE_SECONDS: | |
for sql in SQLS_DISABLE_SYNC_REPL: | |
execute_sql(sql) | |
print("Sync repl disabled - exiting") | |
exit() | |
except Exception as e: | |
print('Caught', e) | |
finally: | |
print(f"Loop finished - sleeping for {LOOP_SLEEP_SECONDS}s ...") | |
time.sleep(LOOP_SLEEP_SECONDS) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment