Last active
March 10, 2017 06:29
-
-
Save kenzo0107/6bca3225abd763ed4ec614dbaaec2c00 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ALERT instance_down | |
IF up == 0 | |
FOR 5m | |
LABELS { severity = "critical" } | |
ANNOTATIONS { | |
summary = "Instance {{ $labels.instance }} down", | |
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.", | |
} | |
ALERT cpu_threshold_exceeded | |
IF (100 * (1 - avg by(instance)(irate(node_cpu{job='node',mode='idle'}[5m])))) > THRESHOLD_CPU | |
ANNOTATIONS { | |
summary = "Instance {{ $labels.instance }} CPU usage is dangerously high", | |
description = "This device's cpu usage has exceeded the threshold with a value of {{ $value }}.", | |
} | |
ALERT mem_threshold_exceeded | |
IF (node_memory_MemFree{job='node'} + node_memory_Cached{job='node'} + node_memory_Buffers{job='node'})/1000000 < THRESHOLD_MEM | |
ANNOTATIONS { | |
summary = "Instance {{ $labels.instance }} memory usage is dangerously high", | |
description = "This device's memory usage has exceeded the threshold with a value of {{ $value }}.", | |
} | |
ALERT filesystem_threshold_exceeded | |
IF node_filesystem_avail{job='node',mountpoint='/'} / node_filesystem_size{job='node'} * 100 < THRESHOLD_FS | |
ANNOTATIONS { | |
summary = "Instance {{ $labels.instance }} filesystem usage is dangerously high", | |
description = "This device's filesystem usage has exceeded the threshold with a value of {{ $value }}.", | |
} | |
ALERT node_high_loadaverage | |
IF rate(node_load1[1m]) > 2 | |
FOR 10s | |
LABELS { severity = "warning" } | |
ANNOTATIONS { | |
summary = "High load average on {{$labels.instance}}", | |
description = "{{$labels.instance}} has a high load average above 10s (current value: {{$value}})" | |
} | |
ALERT httpd_down | |
IF node_httpd_count == 0 | |
FOR 5m | |
LABELS { severity = "fatal" } | |
ANNOTATIONS { | |
summary = "Httpd down on {{$labels.instance}} for {{$labels.service}}", | |
description = "Httpd on {{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.", | |
} | |
ALERT httpd_high | |
IF avg(node_httpd_count) by (group, service) > avg(node_httpd_max_clients) by (group, service) | |
FOR 5m | |
LABELS { severity = "warning" } | |
ANNOTATIONS { | |
summary = "High httpd count on {{$labels.instance}} for {{$labels.service}}", | |
description = "{{$labels.instance}} has a high httpd process count above 5m (current value: {{$value}})", | |
} | |
ALERT mysqld_down | |
IF node_mysqld_Threads_running == 0 | |
FOR 5m | |
LABELS { severity = "fatal" } | |
ANNOTATIONS { | |
summary = "mysqld down on {{$labels.instance}}", | |
description = "mysqld on {{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.", | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment