Skip to content

Instantly share code, notes, and snippets.

@revmischa
Created July 8, 2011 21:44
Show Gist options
  • Save revmischa/1072901 to your computer and use it in GitHub Desktop.
Save revmischa/1072901 to your computer and use it in GitHub Desktop.
Nagios/icinga replication lag check
# Reports replication lag in seconds
# For use with Postgresql and repmgr
# Usage: -c lag_seconds_threshold -n standby_node_id
# Sample output: REPMGR OK - Replication apply time lag: 1 seconds, 11 kB
package MyApp::Monitor::ReplicationLag;
use Moose;
use MooseX::NonMoose;
extends 'Nagios::Plugin';
use namespace::autoclean;
use Time::Duration::Parse;
sub run {
my ($self) = @_;
$self->add_arg(
spec => 'critical|c=i',
help => 'Critical threshold in seconds for replication lag',
required => 1,
default => 120,
);
# node ID
$self->add_arg(
spec => 'node|n=i',
help => 'Standby node ID',
required => 1,
);
$self->getopts;
my $node_id = $self->opts->node;
my $info = $self->node_replication_info($node_id);
my $lag_seconds;
if ($info && $info->{time_lag}) {
my $lag = $info->{time_lag};
$lag =~ s/(\.\d+)$//;
$lag_seconds = parse_duration($lag);
} else {
return $self->nagios_exit($self->UNKNOWN, "Unable to select replication manager info for node $node_id");
}
my $lag_bytes = $info->{replication_lag};
my $ret_code = $self->check_threshold(
check => $lag_seconds,
critical => $self->opts->critical,
);
$self->nagios_exit($ret_code, "Replication apply time lag: $lag_seconds seconds, $lag_bytes");
}
sub node_replication_info {
my ($self, $node_id) = @_;
# get your application's DB schema or DBH here
my $schema = MYAPP->get_schema;
my $info;
$schema->storage->dbh_do(sub {
# change to point at your repmgr table
my $sth = $_[1]->prepare_cached("SELECT * FROM repmgr_myapp.repl_status WHERE standby_node=?");
$sth->execute($node_id);
$info = $sth->fetchrow_hashref;
});
return $info;
}
__PACKAGE__->meta->make_immutable;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment