Created
July 7, 2012 00:00
-
-
Save jtushman/3063475 to your computer and use it in GitHub Desktop.
Watches heroku router logs and summerizes and notifies
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Excute by the following: | |
# bundle exec rake [email protected] GMAIL_PASSWORD=password watch_heroku | |
task :watch_heroku do | |
puts "need to set GMAIL_USERNAME and GMAIL_PASSWORD environment variables" unless (ENV['GMAIL_USERNAME'] && ENV['GMAIL_PASSWORD']) | |
@critical_notified = false | |
@warning_notified = false | |
@total_lines = 0 | |
@total_service = 0 | |
@total_wait = 0 | |
@total_queue = 0 | |
@total_errors = 0 | |
@max_service = 0 | |
@mutex = Mutex.new | |
# print headers | |
puts | |
puts | |
printf "%7s %7s %7s %7s %7s %7s %40s\n", | |
'ops', 'service', 'wait', 'queue', 'max_serv', 'errors', 'time' | |
threads = [] | |
threads << Thread.new('heroku_logs') do | |
IO.popen("heroku logs --ps router --tail --app #{ENV['APP_NAME']}") do |f| | |
while line = f.gets | |
parse_heroku_line(line) if line =~ /^/ | |
end | |
end | |
end | |
# every 60 seconds print out averages, and other stats and reset memos | |
threads << Thread.new('summarize') do | |
while true do | |
sleep(60) | |
average_service = @total_lines > 0 ? @total_service / @total_lines : 'N/A' | |
average_wait = @total_lines > 0 ? @total_wait / @total_lines : 'N/A' | |
average_queue = @total_lines > 0 ? @total_queue / @total_lines : 'N/A' | |
color_print @total_lines | |
color_print average_service, warning: 1000, critical: 10_000 | |
color_print average_wait, warning: 10, critical: 100 | |
color_print average_queue, warning: 10, critical: 100 | |
color_print @max_service, warning: 10_000, critical: 20_000 | |
color_print @total_errors, warning: 1, critical: 10 | |
color_print Time.now, length: 40 | |
printf "\n" | |
# if any errors or average service about a threshold notify | |
check_and_notify(average_service, @total_errors) | |
#reset | |
@mutex.synchronize do | |
@total_service = @total_lines = @total_wait = @total_queue = @total_errors = @max_service = 0 | |
end | |
end | |
end | |
threads.each { |t| t.join } | |
end | |
def color_print(field, options ={}) | |
options[:length] = 7 unless options[:length] | |
if options[:critical] && is_number?(field) && Integer(field) > options[:critical] | |
print "\a" #beep | |
# notify_critical | |
print Term::ANSIColor.red | |
print Term::ANSIColor.bold | |
elsif options[:warning] && is_number?(field) && Integer(field) > options[:warning] | |
print Term::ANSIColor.yellow | |
end | |
printf "%#{options[:length]}s", field | |
print Term::ANSIColor.clear | |
end | |
def parse_heroku_line(line) | |
# 2012-07-05T20:24:10+00:00 heroku[router]: GET my-app.com/pxl/4fdbc97dc6b36c0030001160?value=1 dyno=web.14 queue=0 wait=0ms service=8ms status=200 bytes=35 | |
# or if error | |
#2012-07-05T20:17:12+00:00 heroku[router]: Error H12 (Request timeout) -> GET my-app.com/crossdomain.xml dyno=web.4 queue= wait= service=30000ms status=503 bytes=0 | |
items = line.split | |
if line =~ /Error/ | |
@total_errors += 1 | |
else | |
time = items[0] | |
process = items[1] | |
http_type = items[2] | |
url = items[3] | |
dyno = items[4].split('=').last if items[4] | |
queue = items[5].split('=').last.sub('ms', '') if items[5] | |
wait = items[6].split('=').last.sub('ms', '') if items[6] | |
service = items[7].split('=').last.sub('ms', '') if items[7] | |
status = items[8].split('=').last if items[8] | |
bytes = items[9].split('=').last if items[9] | |
if is_number?(service) && is_number?(wait) && is_number?(queue) | |
@mutex.synchronize do | |
@total_lines +=1 | |
@total_service += Integer(service) if service | |
@total_wait += Integer(wait) if wait | |
@total_queue += Integer(queue) if queue | |
@max_service = Integer(service) if Integer(service) > @max_service | |
end | |
end | |
end | |
end | |
def is_number?(string) | |
_is_number = true | |
begin | |
num = Integer(string) | |
rescue | |
_is_number = false | |
end | |
_is_number | |
end | |
def check_and_notify(average_service, errors) | |
if average_service > 10_000 || errors > 10 | |
notify_critical | |
elsif average_service > 600 || errors > 5 | |
notify_warning | |
end | |
end | |
def notify_kaboom | |
Thread.new('notify_admins') do | |
# send emails | |
end | |
end | |
def notify_critical | |
unless @critical_notified | |
Thread.new('notify_admins') do | |
# send emails | |
end | |
@critical_notified = true | |
end | |
end | |
def notify_warning | |
unless @warning_notified | |
Thread.new('notify_admins') do | |
# send emails | |
end | |
@warning_notified = true | |
end | |
end | |
def send_email(to, msg) | |
content = <<EOF | |
From: #{ENV['GMAIL_USERNAME']} | |
To: #{to} | |
Subject: #{msg} | |
#{msg} | |
EOF | |
content = [ | |
"From: Heroku Dyno Watcher <#{ENV['GMAIL_USERNAME']}>", | |
"To: #{to}", | |
"Subject: #{msg}", | |
"", | |
"#{msg}" | |
].join("\r\n") | |
Net::SMTP.enable_tls(OpenSSL::SSL::VERIFY_NONE) | |
Net::SMTP.start('smtp.gmail.com', 587, 'gmail.com', ENV['GMAIL_USERNAME'], ENV['GMAIL_PASSWORD'], :login) do |smtp| | |
smtp.send_message(content, ENV['GMAIL_USERNAME'], to) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
adding mutex