Created
April 19, 2021 18:25
-
-
Save csereno/4837e90c71b62c8c706e8a4619ef24b7 to your computer and use it in GitHub Desktop.
Cloudformation Templates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CW Alarm Template | |
# | |
# Author: Chris Sereno | |
# Description: This template creates or adds CW alarms to EC2 Instances and will restart and recover instances. | |
# There are requirements to instance recovery. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-recover.html | |
# This is a personal test template and is NOT a vetted or approved template. Use at your own risk. | |
# | |
--- | |
AWSTemplateFormatVersion: '2010-09-09' | |
Description: 'CloudWatch Alarms to recover EC2 Template' | |
Parameters: | |
InstanceId: | |
Type: AWS::EC2::Instance::Id | |
Description: Instance ID of your EC2 | |
ConstraintDescription: Instance Id of an existing EC2 | |
#Status check alarm | |
StatusCheckFailed: | |
Type: String | |
Default: 'No' | |
Description: 'This check verifies that your instance is reachable' | |
AllowedValues: | |
- 'Yes' | |
- 'No' | |
SCAlarmName: | |
Description: Name of the Alarm | |
Type: String | |
Default: "EC2 Status Check Failed" | |
SCAlarmDescription: | |
Description: Description of the Alarm | |
Type: String | |
Default: "An EC2 Status Check Failed" | |
SCAlarmPeriod: | |
Description: The length of a period (in seconds) | |
Type: Number | |
Default: 60 | |
SCAlarmEvalPeriods: | |
Description: The number of specified periods to evaluate | |
Type: Number | |
Default: 2 | |
SCAlarmThreshold: | |
Description: The number of failures that have to occur | |
Type: Number | |
Default: 1 | |
#System status check alarm | |
StatusCheckSystemFailed: | |
Type: String | |
Default: 'No' | |
Description: "This check verifies that your instance's operating system is accepting traffic." | |
AllowedValues: | |
- 'Yes' | |
- 'No' | |
SCSAlarmName: | |
Description: Name of the alarm | |
Type: String | |
Default: "EC2 System Status Check Failed" | |
SCSAlarmDescription: | |
Description: "Description of the Alarm" | |
Type: String | |
Default: "An EC2 System Status Check Failed" | |
SCSAlarmPeriod: | |
Description: The length of a period (in seconds) | |
Type: Number | |
Default: 60 | |
SCSAlarmEvalPeriods: | |
Description: The number of specified periods to evaluate | |
Type: Number | |
Default: 2 | |
SCSAlarmThreshold: | |
Description: The number of failures that have to occur | |
Type: Number | |
Default: 1 | |
#High CPU alarm | |
HighCPU: | |
Type: String | |
Default: 'No' | |
Description: "This will alarm if the CPU is above the given threshold." | |
AllowedValues: | |
- 'Yes' | |
- 'No' | |
HighCPUAlarmPeriod: | |
Description: The length of a period (in seconds) | |
Type: Number | |
Default: 300 | |
HighCPUAlarmEvalPeriods: | |
Description: The number of specified periods to evaluate | |
Type: Number | |
Default: 2 | |
HighCPUAlarmThreshold: | |
Description: The number of failures that have to occur | |
Type: Number | |
Default: 90 | |
#High memory alarm | |
HighMemory: | |
Type: String | |
Default: 'No' | |
Description: "This will alarm if the CPU is above the given threshold." | |
AllowedValues: | |
- 'Yes' | |
- 'No' | |
HighMemAlarmPeriod: | |
Description: The length of a period (in seconds) | |
Type: Number | |
Default: 300 | |
HighMemAlarmEvalPeriods: | |
Description: The number of specified periods to evaluate | |
Type: Number | |
Default: 2 | |
HighMemAlarmThreshold: | |
Description: The number of failures that have to occur | |
Type: Number | |
Default: 90 | |
#Alarm Actions | |
AlarmAction: | |
Description: This allows you take an action such as send an email or text alert. (Must be in ARN format) | |
Type: String | |
Metadata: | |
AWS::CloudFormation::Interface: | |
ParameterGroups: | |
- | |
Label: | |
default: "Instance ID" | |
Parameters: | |
- InstanceId | |
- | |
Label: | |
default: "Status Check Failed Alarm" | |
Parameters: | |
- StatusCheckFailed | |
- SCAlarmName | |
- SCAlarmDescription | |
- SCAlarmPeriod | |
- SCAlarmEvalPeriods | |
- SCAlarmThreshold | |
- | |
Label: | |
default: "Status Check Failed System Alarm" | |
Parameters: | |
- StatusCheckSystemFailed | |
- SCSAlarmName | |
- SCSAlarmDescription | |
- SCSAlarmPeriod | |
- SCSAlarmEvalPeriods | |
- SCSAlarmThreshold | |
- | |
Label: | |
default: "High CPU Alarm" | |
Parameters: | |
- HighCPU | |
- HighCPUAlarmPeriod | |
- HighCPUAlarmEvalPeriods | |
- HighCPUAlarmThreshold | |
- | |
Label: | |
default: "High Memory Alarm" | |
Parameters: | |
- HighMemory | |
- HighMemAlarmPeriod | |
- HighMemAlarmEvalPeriods | |
- HighMemAlarmThreshold | |
- | |
Label: | |
default: "Alarm Actions" | |
Parameters: | |
- AlarmAction | |
ParameterLabels: | |
StatusCheckFailed: | |
default: "Create Status Check Failed Alarm?" | |
StatusCheckSystemFailed: | |
default: "Create System Status Check Failed Alarm?" | |
HighCPU: | |
default: "Create High CPU Alarm?" | |
Conditions: | |
# These conditions determine which OS Resource to create | |
StatusAlarm: !Equals [!Ref StatusCheckFailed, 'Yes'] | |
SystemStatusAlarm: !Equals [!Ref StatusCheckSystemFailed, 'Yes'] | |
HighCPUAlarm: !Equals [!Ref HighCPU, 'Yes'] | |
HighMemAlarm: !Equals [!Ref HighMemory, 'Yes'] | |
Resources: | |
StatusCheckFailedAlarm: | |
Type: AWS::CloudWatch::Alarm | |
Condition: StatusAlarm | |
Properties: | |
AlarmActions: | |
- {"Fn::Join" : ["", ["arn:aws:automate:", { "Ref" : "AWS::Region" }, ":ec2:reboot" ]]} | |
- Ref: "AlarmAction" | |
AlarmDescription: !Ref SCAlarmDescription | |
AlarmName: !Join [ "-", [!Ref SCAlarmName, !Ref InstanceId] ] | |
ComparisonOperator: GreaterThanOrEqualToThreshold | |
Dimensions: | |
- Name: InstanceId | |
Value: | |
Ref: "InstanceId" | |
EvaluationPeriods: '2' | |
MetricName: StatusCheckFailed | |
Namespace: AWS/EC2 | |
Period: '60' | |
Statistic: Maximum | |
Threshold: '1' | |
TreatMissingData: missing | |
Unit: Count | |
StatusCheckSystemFailedAlarm: | |
Type: AWS::CloudWatch::Alarm | |
Condition: SystemStatusAlarm | |
Properties: | |
AlarmActions: | |
- {"Fn::Join" : ["", ["arn:aws:automate:", { "Ref" : "AWS::Region" }, ":ec2:recover" ]]} | |
- Ref: "AlarmAction" | |
AlarmDescription: !Ref SCSAlarmDescription | |
AlarmName: !Ref SCSAlarmName | |
ComparisonOperator: GreaterThanOrEqualToThreshold | |
Dimensions: | |
- Name: InstanceId | |
Value: | |
Ref: "InstanceId" | |
EvaluationPeriods: '2' | |
MetricName: StatusCheckFailed_System | |
Namespace: AWS/EC2 | |
Period: '60' | |
Statistic: Maximum | |
Threshold: '1' | |
TreatMissingData: missing | |
Unit: Count | |
HighCPUAlarm: | |
Type: AWS::CloudWatch::Alarm | |
Condition: HighCPUAlarm | |
Properties: | |
AlarmActions: | |
- Ref: "AlarmAction" | |
AlarmDescription: "High CPU alert on instance" | |
AlarmName: "High CPU alert on instance" | |
ComparisonOperator: GreaterThanOrEqualToThreshold | |
Dimensions: | |
- Name: InstanceId | |
Value: | |
Ref: "InstanceId" | |
EvaluationPeriods: '2' | |
MetricName: CPUUtilization | |
Namespace: AWS/EC2 | |
Period: '300' | |
Statistic: Maximum | |
Threshold: '90' | |
TreatMissingData: missing | |
Unit: Count | |
# For memory alarms install the Cloudwatch agent on the EC2 | |
# https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Install-CloudWatch-Agent.html | |
HighMemAlarm: | |
Type: AWS::CloudWatch::Alarm | |
Condition: HighMemAlarm | |
Properties: | |
AlarmActions: | |
- Ref: "AlarmAction" | |
AlarmDescription: "High memory alert on instance" | |
AlarmName: "High memory alert on instance" | |
ComparisonOperator: GreaterThanOrEqualToThreshold | |
Dimensions: | |
- Name: InstanceId | |
Value: | |
Ref: "InstanceId" | |
EvaluationPeriods: '2' | |
MetricName: mem_used_percent | |
Namespace: AWS/EC2 | |
Period: '300' | |
Statistic: Maximum | |
Threshold: '90' | |
TreatMissingData: missing | |
Unit: Count |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment