Created
October 14, 2011 16:31
-
-
Save melo/1287604 to your computer and use it in GitHub Desktop.
My simple watchdog
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use AnyEvent; | |
use AnyEvent::WebService::Notifo; | |
use Sys::Hostname; | |
### Use your Notifo.com info here | |
my $api_key = ''; | |
my $user = ''; | |
## Define interval for checks, and 1min, 5min, and 15min limits on loadavg | |
my $loadavg_timer = _init_loadavg_watcher(5, 4, 2, 2); | |
### Nothing more to edit | |
_init_logger(); | |
my $notifo = _init_notifo_agent($api_key, $user); | |
_notify("Watchdog ready"); | |
my $watchdog = AE::cv; | |
$watchdog->recv; | |
_notify("Watchdog exited"); | |
### Loadavg watcher | |
sub _init_loadavg_watcher { | |
my ($interval, $lim_avg1, $lim_avg5, $lim_avg15) = @_; | |
my $alarm = 'loadavg'; | |
return AE::timer $interval, $interval, sub { | |
my ($c1, $c5, $c15); | |
eval { ($c1, $c5, $c15) = _read_loadavg() }; | |
set_alarm($alarm, "error: $@"), return if $@; | |
my $info = { | |
avg1 => $c1, | |
avg5 => $c5, | |
avg15 => $c15, | |
lim_avg1 => $lim_avg1, | |
lim_avg5 => $lim_avg5, | |
lim_avg15 => $lim_avg15, | |
}; | |
my $status = "($c1/$lim_avg1, $c5/$lim_avg5, $c15/$lim_avg15)"; | |
if ($c1 && $lim_avg1 && $c1 > $lim_avg1) { | |
set_alarm($alarm, "avg 1 $c1 bad $status", $info); | |
} | |
elsif ($c5 && $lim_avg5 && $c5 > $lim_avg5) { | |
set_alarm($alarm, "avg 5 $c5 bad $status", $info); | |
} | |
elsif ($c15 && $lim_avg15 && $c15 > $lim_avg15) { | |
set_alarm($alarm, "avg 15 $c15 bad $status", $info); | |
} | |
else { | |
clear_alarm($alarm, "all ok $status", $info); | |
} | |
}; | |
} | |
sub _read_loadavg { | |
open(my $fh, '<', '/proc/loadavg') | |
or die "Could not open file '/proc/loadavg'\n"; | |
my $line = <$fh>; | |
close($fh); | |
chomp($line); | |
my ($c1, $c5, $c15) = $line =~ m/^\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)/; | |
die "Could not parse line '$line'" | |
unless defined($c1) && defined($c5) && defined($c15); | |
return ($c1, $c5, $c15); | |
} | |
### Notifo Agent | |
sub _init_notifo_agent { | |
my ($api_key, $user) = @_; | |
my %args; | |
%args = (api_key => $api_key, user => $user) if $api_key && $user; | |
return AnyEvent::WebService::Notifo->new(%args); | |
} | |
### Status DB | |
{ | |
my %status; | |
sub set_alarm { | |
my ($alarm, $mesg, $info) = @_; | |
## We know about this watcher? | |
if (my $cur = $status{$alarm}) { | |
if (my $last_notif = $cur->{alarmed}) { ## Already in alarm mode | |
if (time() - $last_notif < 3600) { ## Only one notif per hour | |
$cur->{last_message} = $mesg; | |
$cur->{info} = $info; | |
_log("Skip alarm $alarm, inside 1 hour: $mesg"); | |
return; | |
} | |
} | |
} | |
## Alarm and notify | |
$status{$alarm} = { | |
alarm => $alarm, | |
alarmed => time(), | |
info => $info, | |
last_message => $mesg, | |
}; | |
_notify("[$alarm] $mesg"); | |
} | |
sub clear_alarm { | |
my ($alarm, $mesg, $info) = @_; | |
my $cur = $status{$alarm}; | |
return unless !$cur || $cur->{alarmed}; | |
$status{$alarm} = { | |
alarm => $alarm, | |
info => $info, | |
last_message => $mesg, | |
}; | |
_notify("[$alarm] $mesg"); | |
} | |
} | |
### Logger/Notify | |
{ | |
my $hostname; | |
sub _init_logger { | |
$hostname = hostname(); | |
} | |
sub _notify { | |
my $mesg = join('', $hostname, ': ', @_); | |
if ($notifo) { | |
$notifo->send_notification( | |
msg => $mesg, | |
cb => sub { | |
my ($res) = @_; | |
use Data::Dump qw(pp); | |
_log("Sent message '$mesg' => ", pp($res)); | |
} | |
); | |
} | |
else { | |
_log("Send of '$mesg' failed, no Notifo agent"); | |
} | |
} | |
sub _log { | |
my $time = localtime(); | |
$_[-1] =~ s/\n+$//; | |
print STDERR "[$$] [$time] @_\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment