-
-
Save smrchy/a1c0e6f05babaa55dea40d1f91c33191 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#host running out of memory! | |
ALERT HighMem | |
IF 100 -(node_memory_MemFree + node_memory_Buffers + node_memory_Cached) / node_memory_MemTotal* 100 > 80 | |
FOR 1m | |
WITH { | |
severity="page" | |
} | |
SUMMARY "Instance {{$labels.host}} has high memory consumption" | |
DESCRIPTION "{{$labels.host}} of job {{$labels.job}} has less than 40% of memory available for more than 1 minutes." | |
#High LOAD PROM ALERT | |
ALERT HighLoad | |
IF node_load1{host=~'prom'} > 2 | |
FOR 1m | |
WITH { | |
severity="page" | |
} | |
SUMMARY "Instance {{$labels.host}} {{$labels.instance}} has high load" | |
DESCRIPTION "{{$labels.host}} of job {{$labels.job}} has high load for more than 1 minutes." | |
#High CPU Alert | |
ALERT HighCPU | |
IF sum by (host,mode)(irate(node_cpu{mode='user'}[5m])) * 100 / scalar(count(count by (cpu)(node_cpu))) > 50 | |
FOR 1m | |
WITH { | |
severity="page" | |
} | |
SUMMARY "Instance {{$labels.host}} {{$labels.instance}} has high cpu" | |
DESCRIPTION "{{$labels.host}} of job {{$labels.job}} has been consuming > 50% cpi for more than 1 minutes." | |
#SBM ALERT | |
ALERT MysqlSlaveLag | |
IF mysql_slave_status_seconds_behind_master > 1 | |
FOR 1m | |
WITH { | |
severity="page" | |
} | |
SUMMARY "Instance {{$labels.host}} {{$labels.instance}} more than 1 seconds behind master" | |
DESCRIPTION "{{$labels.host}} of job {{$labels.job}} is currently experiencing replication lag" | |
#LATENCY ALERT | |
ALERT ELBHighLatency | |
IF avg(aws_elb_latency_average{load_balancer_name='production-mobile-lb'}) *1000 > 120 | |
FOR 1m | |
WITH { | |
severity="page" | |
} | |
SUMMARY "latency > 120ms on ELB" | |
DESCRIPTION "Latency is higher than 120ms for more than 1 minutes" | |
#DISK SPACE | |
ALERT LowDiskSpace | |
IF 100 - node_filesystem_free{job="node_exporter",fstype!='xfs'} / node_filesystem_size{job="node_exporter", fstype!='xfs'} * 100 > 70 | |
FOR 1m | |
WITH { | |
severity="page" | |
} | |
SUMMARY "disk space on {{$labels.host}} {{$labels.instance}} lower than 30%" | |
DESCRIPTION "disk space on {{$labels.host}} {{$labels.instance}} lower than 30%" | |
ALERT ELBUnHealthyHosts | |
IF sum(aws_elb_un_healthy_host_count_average{load_balancer_name="production-lb"}) > 0 | |
FOR 1m | |
WITH { | |
severity="page" | |
} | |
SUMMARY "elb has >0 unhealthy hosts" | |
DESCRIPTION "elb has >0 unhealthy hosts" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment