Created
May 19, 2009 22:52
-
-
Save lfittl/114479 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/usr/bin/walmgr /etc/walmgr/master.ini xarchive %p %f >> /var/log/postgresql/archivecmd.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[wal-master] | |
job_name = master | |
logfile = /var/log/postgresql/walmgr-master.log | |
master_db = dbname=soup_production | |
master_data = /var/lib/postgresql/8.3/main | |
master_config = /etc/postgresql/8.3/main/postgresql.conf | |
slave_config = /etc/walmgr/slave.ini | |
slave = core.in.soup.io:/srv/dbstandby-walmgr | |
completed_wals = %(slave)s/logs.complete | |
partial_wals = %(slave)s/logs.partial | |
full_backup = %(slave)s/data.master | |
file_target = %(slave)s/files.master | |
# syncdaemon update frequency | |
loop_delay = 10.0 | |
# use record based shipping available in 8.2 | |
use_xlog_functions = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
restore_command = '/usr/bin/walmgr /etc/walmgr/slave.ini xrestore %f "%p"' | |
log_restartpoints = 'true' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[wal-slave] | |
job_name = standby | |
logfile = /var/log/postgresql/walmgr-standby.log | |
slave_volume = /dev/vg-core/dbstandby | |
slave_data = /var/lib/postgresql/8.3/standby | |
slave_stop_cmd = /usr/bin/pg_ctlcluster --force 8.3 standby stop | |
slave_start_cmd = /usr/bin/pg_ctlcluster 8.3 standby start | |
slave_bin = /usr/lib/postgresql/8.3/bin | |
snapshot_volume = /dev/vg-core/dbindexer | |
snapshot_size = 20G | |
snapshot_data = /var/lib/postgresql/8.3/indexer | |
snapshot_stop_cmd = /usr/bin/pg_ctlcluster --force 8.3 indexer stop | |
snapshot_start_cmd = /usr/bin/pg_ctlcluster 8.3 indexer start | |
slave = /srv/dbstandby-walmgr | |
completed_wals = %(slave)s/logs.complete | |
partial_wals = %(slave)s/logs.partial | |
full_backup = %(slave)s/data.master | |
keep_backups = 0 | |
archive_command = |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- walmgr.orig 2009-04-08 09:04:55.000000000 +0000 | |
+++ walmgr 2009-04-08 10:16:13.000000000 +0000 | |
@@ -52,7 +52,7 @@ | |
""" | |
import os, sys, skytools, re, signal, time, traceback | |
-import errno, glob, ConfigParser, shutil | |
+import errno, glob, ConfigParser, shutil, subprocess, math | |
MASTER = 1 | |
SLAVE = 0 | |
@@ -142,6 +142,30 @@ | |
label.label_string = m.group(1) | |
return label | |
+def get_wal_filename(timeline, wal_location, wal_segment_size): | |
+ upper_wal_location, lower_wal_location = [int(x, 16) for x in wal_location.split('/')] | |
+ | |
+ # Lower WAL location is split into a file identifier, and the location within the file itself (depending on WAL segment size) | |
+ lower_wal_location_file = lower_wal_location >> int(math.log(wal_segment_size) / math.log(2)) | |
+ | |
+ return "%08X%08X%08X" % (timeline, upper_wal_location, lower_wal_location_file) | |
+ | |
+def get_redo_filename(data_dir): | |
+ timeline_regexp = re.compile("^Latest checkpoint's TimeLineID:\s*(.*)$", re.MULTILINE) | |
+ redo_location_regexp = re.compile("^Latest checkpoint's REDO location:\s*(.*)$", re.MULTILINE) | |
+ wal_segment_size_regexp = re.compile("^Bytes per WAL segment:\s*(.*)$", re.MULTILINE) | |
+ | |
+ pgcontrol = subprocess.Popen(["/usr/lib/postgresql/8.3/bin/pg_controldata", data_dir], stdout=subprocess.PIPE).communicate()[0] | |
+ | |
+ if pgcontrol == '': return | |
+ | |
+ timeline = int(timeline_regexp.search(pgcontrol).group(1)) | |
+ redo_location = redo_location_regexp.search(pgcontrol).group(1) | |
+ wal_segment_size = int(wal_segment_size_regexp.search(pgcontrol).group(1)) | |
+ | |
+ return get_wal_filename(timeline, redo_location, wal_segment_size) | |
+ | |
+ | |
class WalMgr(skytools.DBScript): | |
def init_optparse(self, parser=None): | |
@@ -193,6 +217,8 @@ | |
'pause': self.slave_pause, | |
'continue': self.slave_continue, | |
'boot': self.slave_boot, | |
+ 'boot-snapshot': self.slave_boot_snapshot, | |
+ 'destroy-snapshot': self.slave_destroy_snapshot, | |
'xlock': self.slave_lock_backups_exit, | |
'xrelease': self.slave_resume_backups, | |
'xrotate': self.slave_rotate_backups, | |
@@ -953,7 +979,12 @@ | |
# cleanup only if we don't keep backup history. | |
# historic WAL files are removed during backup rotation | |
self.log.debug("%s: copy done, cleanup" % srcname) | |
- self.slave_cleanup(srcname) | |
+ | |
+ redo_filename_standby = get_redo_filename(self.cf.get("slave_data")) | |
+ redo_filename_snapshot = get_redo_filename(self.cf.get("snapshot_data")) | |
+ redo_filename = (redo_filename_snapshot and redo_filename_snapshot < redo_filename_standby) and redo_filename_snapshot or redo_filename_standby | |
+ self.log.info("Deleting old WAL logs, last required file for REDO is %s" % redo_filename) | |
+ self.slave_cleanup(redo_filename) | |
if os.path.isfile(partfile) and not srcfile == partfile: | |
# Remove any partial files after restore. Only leave the partial if | |
@@ -1151,7 +1182,52 @@ | |
self.exec_cmd(["cp", src_authfile, dst_authfile]) | |
except Exception, e: | |
self.log.warning("Unable to restore pg_auth file: %s" % e) | |
+ | |
+ def slave_boot_snapshot(self): | |
+ if self.not_really: return | |
+ | |
+ slave_volume = self.cf.get("slave_volume") | |
+ snapshot_volume = self.cf.get("snapshot_volume") | |
+ snapshot_size = self.cf.get("snapshot_size") | |
+ data_dir = self.cf.get("snapshot_data") | |
+ self.slave_pause(waitcomplete=1) | |
+ | |
+ try: | |
+ self.exec_cmd(["/sbin/lvcreate", "--size", snapshot_size, "--snapshot", "--name", snapshot_volume, slave_volume]) | |
+ self.exec_cmd(["/bin/mount", snapshot_volume, data_dir]) | |
+ finally: | |
+ # Important: LVM snapshot has to be mounted before we continue, so standby won't delete WAL logs we still need | |
+ self.slave_continue() | |
+ | |
+ # Remove the PID file of the slave PostgreSQL instance, if it exists | |
+ old_pidfile = os.path.join(data_dir, "postmaster.pid") | |
+ if os.path.isfile(old_pidfile): | |
+ os.remove(old_pidfile) | |
+ | |
+ # Write new recovery.conf, to restore all WAL logs from last checkpoint to "crash" | |
+ recovery_conf = os.path.join(data_dir, "recovery.conf") | |
+ cf_file = os.path.abspath(self.cf.filename) | |
+ f = open(recovery_conf, "w") | |
+ f.write("restore_command = 'cp %s/%%f %%p'\n" % self.cf.get("completed_wals")) | |
+ f.close() | |
+ | |
+ os.system(self.cf.get("snapshot_start_cmd")) | |
+ | |
+ started_regexp = re.compile("^Database cluster state:\s*in production$", re.MULTILINE) | |
+ while True: | |
+ pgcontrol = subprocess.Popen(["/usr/lib/postgresql/8.3/bin/pg_controldata", data_dir], stdout=subprocess.PIPE).communicate()[0] | |
+ if started_regexp.search(pgcontrol): | |
+ break | |
+ time.sleep(1) | |
+ | |
+ def slave_destroy_snapshot(self): | |
+ if self.not_really: return | |
+ | |
+ os.system(self.cf.get("snapshot_stop_cmd")) | |
+ self.exec_cmd(["/bin/umount", self.cf.get("snapshot_data")]) | |
+ self.exec_cmd(["/sbin/lvremove", "--force", self.cf.get("snapshot_volume")]) | |
+ | |
def slave_pause(self, waitcomplete=0): | |
"""Pause the WAL apply, wait until last file applied if needed""" | |
self.assert_valid_role(SLAVE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment