Skip to content

Instantly share code, notes, and snippets.

@davidblewett
Last active July 10, 2018 10:54
Show Gist options
  • Save davidblewett/8282108 to your computer and use it in GitHub Desktop.
Save davidblewett/8282108 to your computer and use it in GitHub Desktop.
# The following are suggested values
# They should be set in all configs,
# so that if a secondary needs to be promoted
# it will be generating the correct WAL level
wal_level = hot_standby
archive_mode = on # allows archiving to be done
# (change requires restart)
archive_command = 'cp -f %p /data/pgsql/archive/%f </dev/null' # command to use to archive a logfile segment
max_wal_senders = 5 # max number of walsender processes
# (change requires restart)
wal_keep_segments = 128 # in logfile segments, 16MB each; 0 disables
hot_standby = on # "on" allows queries during recovery
# (change requires restart)
hot_standby_feedback = on
#
# The following sets up Postgres' streaming replication,
# so that the secondary server will build on top of the
# last ZFS snapshot and stay current.
#---------------------------------------------------------------------------
# ARCHIVE RECOVERY PARAMETERS
#---------------------------------------------------------------------------
archive_cleanup_command = 'pg_archivecleanup /data/pgsql/archive %r'
#
#---------------------------------------------------------------------------
# STANDBY SERVER PARAMETERS
#---------------------------------------------------------------------------
#
# When standby_mode is enabled, the PostgreSQL server will work as
# a standby. It tries to connect to the primary according to the
# connection settings primary_conninfo, and receives XLOG records
# continuously.
#
standby_mode = 'on'
#
primary_conninfo = 'host=<IP of primary> user=replicator' # e.g. 'host=localhost port=5432'
#
#
# By default, a standby server keeps streaming XLOG records from the
# primary indefinitely. If you want to stop streaming and finish recovery,
# opening up the system in read/write mode, specify path to a trigger file.
# Server will poll the trigger file path periodically and stop streaming
# when it's found.
#
#trigger_file = ''
trigger_file = '/data/pgsql/data/FAILOVER'
#!/bin/sh
DB_IP=$1
if [ $# -gt 1 ]; then
ZFS_IP=$2
else
ZFS_IP=$1
fi
SSH_KEY=/home/zfssync/.ssh/id_rsa
LOCKFILE=/tmp/pgsql.lock
LOGFILE=/root/reset-secondary.log
ZFS_FS=data/pgsql
PGDATA=/$ZFS_FS/data
HOST=`hostname`
TSTAMP=`date +%Y-%m-%dT%H:%M:%SZ`
LAST_SNAP=`zfs list -r -t snapshot $ZFS_FS | grep $HOST | tail -n 1 | awk '{print $1}'`
SNAP="$ZFS_FS"@"$HOST"_"$TSTAMP"
if [ -e $LOCKFILE ]; then
echo "Another instance of this script is running."
echo "Exiting."
exit 2
else
touch $LOCKFILE
fi
# verify Postgresql is stopped locally.
echo Stopping PostgreSQL on Localhost
service postgresql stop
echo `date +%Y%m%d-%H%M%S` Stopped Postgresql >> $LOGFILE
# tell the master we're starting a backup
echo `date +%Y%m%d-%H%M%S` Starting Backup on $DB_IP
psql -U replicator -h $DB_IP -c "SELECT pg_start_backup('$TSTAMP',true);" postgres
echo `date +%Y%m%d-%H%M%S` Told $DB_IP about us >> $LOGFILE
# Create ZFS snapshot on remote master
echo `date +%Y%m%d-%H%M%S` Creating ZFS snapshot: $SNAP on $ZFS_IP >> $LOGFILE
ssh -i $SSH_KEY zfssync@$ZFS_IP zfs snapshot $SNAP
# Create ZFS snapshot on remote master
echo `date +%Y%m%d-%H%M%S` Replicating from $LAST_SNAP to $SNAP >> $LOGFILE
ssh -i $SSH_KEY zfssync@$ZFS_IP zfs send -R -i $LAST_SNAP $SNAP | zfs recv -F $ZFS_FS
rm $PGDATA/FAILOVER
rm $PGDATA/postmaster.pid
rm $PGDATA/recovery.conf
# Update the postgres conf to match the host
rm $PGDATA/postgresql.conf
ln -s $PGDATA/postgresql-$HOST.conf $PGDATA/postgresql.conf
echo `date +%Y%m%d-%H%M%S` Completed sync from $ZFS_IP >> $LOGFILE
# stop backup on the master
echo `date +%Y%m%d-%H%M%S` Stopping Backup on $DB_IP
psql -U replicator -h $DB_IP -c "SELECT pg_stop_backup();" postgres
echo `date +%Y%m%d-%H%M%S` told $DB_IP we are done >> $LOGFILE
# reset the local instance for SLAVE status
echo `date +%Y%m%d-%H%M%S` Adding Recovery.conf to $PGDATA
ln -s /$ZFS_FS/recovery.conf $PGDATA
echo `date +%Y%m%d-%H%M%S` Replaced Recovery.conf >> $LOGFILE
# restart Postgresql
service postgresql start
echo `date +%Y%m%d-%H%M%S` restarted posgresql >> $LOGFILE
# Clean up after ourselves.
rm $LOCKFILE;
echo "`date +%Y%m%d-%H%M%S` Removed Lockfile"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment