Created
February 15, 2022 19:26
-
-
Save chilledornaments/36b21254e810b6d63fe3afbaf540a103 to your computer and use it in GitHub Desktop.
Alarm when elasticache nodes failover
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```hcl | |
resource "aws_cloudwatch_metric_alarm" "realtime_redis_primary_failover" { | |
for_each = toset(module.my_cloudposse_redis_module.member_clusters[0]) | |
alarm_name = "${each.value}-failover" | |
comparison_operator = "GreaterThanThreshold" | |
alarm_description = "Monitor for Redis failover events" | |
evaluation_periods = 1 | |
threshold = 0 | |
alarm_actions = [] | |
ok_actions = [] | |
/* | |
IsMaster is either 1 (node is a primary) or 0 (node is a replica) | |
This alarm alerts us when a primary becomes a replica or vice versa | |
DIFF "Returns the difference between each value in the time series and the preceding value from that time series." | |
https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/using-metric-math.html | |
If the datapoint A is 1, and datapoint B is 0, then a failover has occurred and the DIFF is not 0 | |
To make alerting easier, we simply check if DIFF(is_master) is anything besides 0 | |
*/ | |
metric_query { | |
id = "e1" | |
expression = "DIFF(is_master) != 0" | |
label = "Failover Occurred" | |
return_data = true | |
} | |
metric_query { | |
id = "is_master" | |
metric { | |
metric_name = "IsMaster" | |
namespace = "AWS/ElastiCache" | |
period = 60 | |
stat = "Maximum" | |
dimensions = { | |
CacheClusterId = each.value | |
CacheNodeId = "0001" | |
} | |
} | |
} | |
} | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment