I'm trying to configure a hot-standby system. I have two machines:
- db0 :: continuously archiving master
- db1 :: continuously replicating slave
I have my system laid out somewhat differently than usual, mostly to accomadate the limitations of puppet. You'll find my postgresql.conf
s and pg_hba.conf
here. The mapping of my disks to mount points are like so:
/dev/md0
-->/srv/pg/data
/dev/md1
-->/srv/pg/xlog
/dev/xvdn
-->/srv/pg/archive
You'll note that archive_commmand
is set to dump WAL files into /srv/pg/archive
. Here's how db1
is prepared to get a hot-copy of db0
's database:
root@db1:~# /etc/init.d/postgresql stop
Stopping PostgreSQL 9.0 database server: main.
root@db1:~# umount /dev/md0 && mkfs.xfs -f /dev/md0 && mount /dev/md0
log stripe unit (524288 bytes) is too large (maximum is 256KiB)
log stripe unit adjusted to 32KiB
meta-data=/dev/md0 isize=256 agcount=16, agsize=1638272 blks
= sectsz=512 attr=2, projid32bit=0
data = bsize=4096 blocks=26212352, imaxpct=25
= sunit=128 swidth=256 blks
naming =version 2 bsize=4096 ascii-ci=0
log =internal log bsize=4096 blocks=12800, version=2
= sectsz=512 sunit=8 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
root@db1:~# umount /dev/md1 && mkfs.xfs -f /dev/md1 && mount /dev/md1
log stripe unit (524288 bytes) is too large (maximum is 256KiB)
log stripe unit adjusted to 32KiB
meta-data=/dev/md1 isize=256 agcount=16, agsize=163712 blks
= sectsz=512 attr=2, projid32bit=0
data = bsize=4096 blocks=2619392, imaxpct=25
= sunit=128 swidth=256 blks
naming =version 2 bsize=4096 ascii-ci=0
log =internal log bsize=4096 blocks=2560, version=2
= sectsz=512 sunit=8 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
root@db1:~# umount /dev/xvdn && mkfs.xfs -f /dev/xvdn && mount /dev/xvdn
meta-data=/dev/xvdn isize=256 agcount=4, agsize=655360 blks
= sectsz=512 attr=2, projid32bit=0
data = bsize=4096 blocks=2621440, imaxpct=25
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0
log =internal log bsize=4096 blocks=2560, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
root@db1:~# chown -R postgres:postgres /srv/pg/ && chmod -R 0700 /srv/pg/
root@db1:~# ls -al /srv/pg/
total 8
drwx------ 5 postgres postgres 4096 Sep 1 13:22 .
drwxr-xr-x 3 root root 4096 Sep 1 05:12 ..
drwx------ 2 postgres postgres 6 Sep 1 19:27 archive
drwx------ 2 postgres postgres 6 Sep 1 19:26 data
drwx------ 2 postgres postgres 6 Sep 1 19:26 xlog
We move to db0
and
root@db0:~# su - postgres
postgres@db0:~$ psql
psql (9.0.4)
Type "help" for help.
postgres=# SELECT pg_start_backup('memory');
pg_start_backup
-----------------
0/12000020
(1 row)
postgres=# ^D\q
postgres@db0:~$ logout
root@db0:~# rsync -azq --exclude /srv/pg/data/pg_xlog /srv/pg/data/ db1.carepilot.com:/srv/pg/data
[email protected]'s password:
root@db0:~# su - postgres
postgres@db0:~$ psql
psql (9.0.4)
Type "help" for help.
postgres=# SELECT pg_stop_backup();
NOTICE: pg_stop_backup complete, all required WAL segments have been archived
pg_stop_backup
----------------
0/120000D8
(1 row)
postgres=# ^D\q
postgres@db0:~$ logout
root@db0:~# ls /srv/pg/xlog/
000000010000000000000012 000000010000000000000012.00000020.backup 000000010000000000000013
000000010000000000000014 archive_status
back on db1
root@db1:~# ls /srv/pg/archive/
000000010000000000000011 000000010000000000000012 000000010000000000000012.00000020.backup
root@db1:~# ... put recovery.conf to /srv/pg/data/recovery.conf ...
root@db1:~# /etc/init.d/postgresql start
Starting PostgreSQL 9.0 database server: main.
root@db1:~# tail -f -n0 /var/log/messages
Sep 1 20:19:32 db1 postgres[16455]: [1-1] r (4e5fe8d4.4047) -- 2011-09-01 20:19:32.788 UTC>
LOG: database system was interrupted; last known up at 2011-09-01 20:17:35 UTC
Sep 1 20:19:32 db1 postgres[16455]: [2-1] r (4e5fe8d4.4047) -- 2011-09-01 20:19:32.788 UTC>
LOG: creating missing WAL directory "pg_xlog/archive_status"
Sep 1 20:19:32 db1 postgres[16455]: [3-1] r (4e5fe8d4.4047) -- 2011-09-01 20:19:32.789 UTC>
LOG: entering standby mode
Sep 1 20:19:32 db1 postgres[16455]: [4-1] r (4e5fe8d4.4047) -- 2011-09-01 20:19:32.813 UTC>
LOG: restored log file "000000010000000000000014" from archive
Sep 1 20:19:32 db1 postgres[16455]: [5-1] r (4e5fe8d4.4047) -- 2011-09-01 20:19:32.817 UTC>
LOG: redo starts at 0/14000020
Sep 1 20:19:32 db1 postgres[16455]: [6-1] r (4e5fe8d4.4047) -- 2011-09-01 20:19:32.831 UTC>
LOG: consistent recovery state reached at 0/15000000
Sep 1 20:19:32 db1 postgres[16454]: [1-1] r (4e5fe8d3.4046) -- 2011-09-01 20:19:32.832 UTC>
LOG: database system is ready to accept read only connections
Sep 1 20:19:32 db1 postgres[16461]: [2-1] r (4e5fe8d4.404d) -- 2011-09-01 20:19:32.838 UTC>
LOG: incomplete startup packet
What's the deal? These are logs sent directly by the postgresql daemon.