Created
September 30, 2014 13:16
-
-
Save RaphaelAudet/4c5bd9e6b3d00021d266 to your computer and use it in GitHub Desktop.
Resque Stalled Alert : this script can check if a resque queue is overflowing or a job is "stuck"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'date' | |
require 'redis' | |
require 'net/smtp' | |
require 'resque' | |
require 'resque/version' | |
class App | |
VERSION = '0.0.1' | |
def initialize | |
# Development parameters | |
redis_host = '127.0.0.1' | |
redis_password = 'password' | |
redis_port = 6379 | |
@redis_namespace = 'resque:app_development' | |
@verbose = true # set to false in production | |
@veryverbose = true # set to false in production and in staging | |
@cycle = 3 # check resque every @cycle seconds | |
@alert_queue_length_threshold = 300 # over this threshold an alert will be sent | |
@alert_job_stuck_threshold = 300 # over this threshold an alert will be sent, in seconds | |
#email settings | |
#Senders and Recipients | |
@send_email = false # set to true is you want to send alert emails | |
@from_mail = '[email protected]' | |
@to_mail = '[email protected]' | |
@from_name = 'Resque Queue Monitoring Robot' | |
@to_name = 'Monitoring team' | |
#Servers and Authentication | |
@smtp_host = '127.0.0.1' | |
@smtp_port = 25 | |
@smtp_domain = 'email.com' | |
@smtp_user = '[email protected]' | |
@smtp_pwd = 'password' | |
@redis = Redis.new(:host => redis_host, :port => redis_port, :password => redis_password, :thread_safe => true) | |
Resque.redis = @redis | |
Resque.redis.namespace = @redis_namespace | |
@prev_stat = [0,0,0,0,0,0,0,0,0,0,0,0] | |
@sent_job_alert = '' | |
end | |
def run | |
puts "Start at #{DateTime.now}\n\n" if @verbose | |
check_queue_length | |
check_job_stuck | |
puts "\nFinished at #{DateTime.now}" if @verbose | |
sleep @cycle | |
end | |
protected | |
def check_queue_length | |
queues = Resque.queues | |
stat = [] | |
index = 0 | |
queues.each do |q| | |
size = Resque.size(q) | |
prev_size = @prev_stat[index] | |
stat << size | |
puts (q + ' ; ' + size.to_s) if @verbose | |
if prev_size < @alert_queue_length_threshold | |
if size > @alert_queue_length_threshold | |
alert_queue_length(q,size) | |
end | |
end | |
index += 1 | |
end | |
puts queues.to_s if @veryverbose | |
@prev_stat = stat | |
write_stat(queues, stat) | |
end | |
def write_stat(queues, stat) | |
puts stat.join(", ") if @veryverbose | |
log_file = "/tmp/log_file_#{@rails_env}_#{Date.today.to_s}" | |
File.open(log_file, 'a') do |f| | |
f.puts (DateTime.now.to_s + ', ' + queues.join(', ') + ', ' + stat.join(", ")) | |
end | |
end | |
def alert_queue_length(queue, nb_jobs) | |
subj = "#{@rails_env} Warning Resque:#{queue} has #{nb_jobs} jobs" | |
msg_body ="the resque #{@redis_namespace} queue #{queue} is #{nb_jobs} jobs long. (alerting over #{@alert_queue_length_threshold} jobs)" | |
puts subj if @verbose | |
puts msg_body if @veryverbose | |
email(subj, msg_body) if @send_email | |
@sent_queue_alert = true | |
end | |
def check_job_stuck | |
workers = Resque.workers | |
for worker in (workers = workers.sort_by { |w| w.to_s }) | |
data = worker.processing || {} | |
if data['queue'] | |
job = data['payload']['class'] + ':' + data['run_at'] + ':' + data[':args'].to_s | |
max_tim = DateTime.strptime(data['run_at']).to_time + @alert_job_stuck_threshold | |
alert_job_stuck(worker, job) if max_tim < Time.now && !@sent_job_alert.include?(job) | |
end | |
puts (worker.to_s + ' ; ' + job.to_s) if @verbose | |
end | |
end | |
def alert_job_stuck(worker, job_title) | |
subj = "#{@rails_env} Warning: #{job_title} stuck on #{worker}" | |
msg_body ="the job #{job_title} is running for more than #{@alert_job_stuck_threshold/60} minutes on on #{worker}" | |
puts subj if @verbose | |
puts msg_body if @veryverbose | |
email(subj, msg_body) if @send_email | |
@sent_job_alert << job_title | |
end | |
def email(subj, msg_body) | |
#The subject and the message | |
t = Time.now | |
#The date/time should look something like: Thu, 03 Jan 2006 12:33:22 -0700 | |
msg_date = t.strftime("%a, %d %b %Y %H:%M:%S %z") | |
#Compose the message for the email | |
msg = <<END_OF_MESSAGE | |
Date: #{@msg_date} | |
From: #{@from_name} <#{@from_mail}> | |
To: #{@to_name} <#{@to_mail}> | |
Subject: #{subj} | |
#{msg_body} | |
END_OF_MESSAGE | |
Net::SMTP.start(@smtp_host, @smtp_port, @smtp_domain) do |smtp| | |
smtp.send_message msg, @smtp_user, @to_mail | |
end | |
end | |
end | |
# Create and run the application | |
app = App.new | |
loop do | |
app.run | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment