Created
November 22, 2011 16:04
-
-
Save oogali/1386011 to your computer and use it in GitHub Desktop.
nagios ack bot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # --- | |
| # quick and dirty nagios acknowledgement bot | |
| # [email protected] | |
| # @oogali | |
| # --- | |
| # | |
| # what do you need to run? | |
| # - ruby 1.8+ | |
| # - $ gem install isaac eventmachine em-hiredis em-http-request | |
| # - my nagios mail to redis bridge | |
| # - a working irc server | |
| # - a working redis installation | |
| # - a working nagios installation (daemon, web+cgi, mail) | |
| # | |
| # how does it work? | |
| # a) nagios generates an alert, and e-mails it to [email protected] | |
| # e.g. oogali+notifications@localhost | |
| # | |
| # b) in ~some/.forward, you have my nagios-to-redis publisher: | |
| # $ cat ~oogali/.forward+notifications | |
| # | /home/oogali/nagios-mail-to-redis.rb | |
| # | |
| # c) you're running the irc bot, and it's connected to both irc and redis | |
| # 09:20:35 -!- nagbot [[email protected]] has joined #systems | |
| # 09:23:36 < nagbot> [001] Service PROBLEM: Layer 7: UDP/12345 Goofy Healthcheck @ oogali.lab.blip.tv | |
| # | |
| # d) you acknowledge the alert, and wait for the update | |
| # 09:36:31 <@oo> !ack 1 | |
| # 09:36:54 < nagbot> [***] Service ACKNOWLEDGEMENT: Layer 7: UDP/12345 Goofy Healthcheck @ oogali.lab.blip.tv | |
| # | |
| # you can optionally acknowledge with a message, that will be recorded in nagios ack history: | |
| # 10:13:22 <@niels> !ack 13 the rent is too damn high | |
| # 10:13:27 < nagbot> [***] Service ACKNOWLEDGEMENT: Layer 7: UDP/5678 Stealth Server Healthcheck @ niels.lab.blip.tv | |
| # | |
| require 'rubygems' | |
| require 'isaac' | |
| require 'em-hiredis' | |
| require 'em-http-request' | |
| # irc bot/network settings | |
| configure do |c| | |
| c.nick = 'nagbot' | |
| c.server = 'not.your.favorite.efnet.server' | |
| c.port = 6697 | |
| c.ssl = true | |
| end | |
| # define our program settings as constants | |
| REDIS_URI = 'redis://localhost:6379' | |
| ALERT_TTL = 300 | |
| NAGIOS_INSTANCE = 'http://localhost/nagios/' | |
| NAGIOS_USER= 'ircuser' | |
| NAGIOS_PASSWORD = 'too many secrets' | |
| CHANNEL = '#systems' | |
| #KEY = 'key to your channel' | |
| ## You shouldn't need to change anything below here ## | |
| # define some globals | |
| msgnum = 0 | |
| redis = nil | |
| pubsub = nil | |
| # connect to irc network | |
| on :connect do | |
| join "#{CHANNEL} #{KEY}" | |
| # connect regular redis handle | |
| redis = EM::Hiredis.connect REDIS_URI | |
| redis.errback do |code| | |
| msg CHANNEL, "could not connect to redis: #{code}" | |
| end | |
| # connect redis pubsub handle | |
| pubsub = EM::Hiredis.connect REDIS_URI | |
| pubsub.errback do |code| | |
| msg CHANNEL, "could not connect to redis (pubsub): #{code}" | |
| end | |
| # subscribe to nagios alerts, and set up callback | |
| pubsub.subscribe('nagios::alerts') | |
| pubsub.on(:message) do |channel, msg| | |
| m = msg.match /\*\*\s+(\S+)\s+(\S+)\s+(\S+):\s+(\S+)\/(.+)\s+is\s+(.+)\s+\*\*/ | |
| if !m | |
| msg '#systems', msg | |
| else | |
| if m[1] == 'ACKNOWLEDGEMENT' | |
| msg CHANNEL, "[***] #{m[2]} #{m[1]}: #{m[5]} @ #{m[4]}" | |
| elsif m[1] == 'RECOVERY' | |
| msg CHANNEL, "[***] #{m[2]} #{m[1]}: #{m[5]} @ #{m[4]} is #{m[6]}" | |
| else | |
| # msgnum is a continually incrementing counter | |
| msgnum += 1 | |
| msg CHANNEL, "[#{'%03d' % msgnum}] #{m[2]} #{m[1]}: #{m[5]} @ #{m[4]}" | |
| # everytime a message is logged to the cache, it's set with a 30 second expiry | |
| # you have 30 seconds to ack a message | |
| key = "nagios::alerts::#{'%03d' % msgnum}" | |
| redis.set(key, "#{m[4]};#{m[5]}") do | |
| redis.expire key, ALERT_TTL | |
| end | |
| end | |
| end | |
| end | |
| end | |
| on :channel, /^!ack\s+(\d+)\s*(.*)/ do | |
| # create key name | |
| key = "nagios::alerts::#{'%03d' % match[0].to_i}" | |
| # get host/service pair from redis | |
| redis.get(key) do |alert| | |
| if alert | |
| # get the host and service, then send a ack request to http instance | |
| host, service = alert.split(/;/, 2) | |
| h = EventMachine::HttpRequest.new("#{NAGIOS_INSTANCE}/cgi-bin/cmd.cgi").post({ | |
| :head => { | |
| 'Authorization' => [ NAGIOS_USER, NAGIOS_PASSWORD ] | |
| }, | |
| :body => { | |
| :cmd_typ => 34, | |
| :cmd_mod => 2, | |
| :host => host, | |
| :service => service, | |
| :send_notification => 1, | |
| :com_author => NAGIOS_USER, | |
| :com_data => "By #{nick}: #{match[1] || 'No message given'}" | |
| } | |
| }) | |
| # XXX: should only delete key if previous http request comes back without errors | |
| redis.delete(key) | |
| end | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
On what versions of Nagios have you tested this?