Skip to content

Instantly share code, notes, and snippets.

@gswallow
Last active December 20, 2022 21:45
Show Gist options
  • Save gswallow/e299edc7ec2e6b8498b813f871edb776 to your computer and use it in GitHub Desktop.
Save gswallow/e299edc7ec2e6b8498b813f871edb776 to your computer and use it in GitHub Desktop.
"metrics": {
...
"disk": {
"measurement": [
"used_percent",
"inodes_free"
],
"metrics_collection_interval": 60,
"resources": [
"*"
],
"ignore_file_system_types": [
"sysfs",
"devtmpfs",
"tmpfs",
"debugfs",
"rpc_pipefs",
"hugetlbfs"
],
"drop_device": true,
"append_dimensions": {
"Organization": "${organization}",
"Environment": "${environment}",
"Project": "${project}",
"ClusterId": "${cluster_id}"
}
},
...
}
"metrics": {
"namespace": "${organization}/${environment}/${project}/CWAgent",
"aggregation_dimensions": [
[ "Organization", "Environment", "Project", "ClusterId", "AutoScalingGroupName" ],
[ "AutoScalingGroupName" ],
[ "InstanceId" ]
],
"append_dimensions": {
"AutoScalingGroupName": "$${aws:AutoScalingGroupName}"
},
"metrics_collected": {
"cpu": {
...
},
"disk": {
...
},
"diskio": {
...
},
"mem": {
...
},
"statsd": {
... optional ...
},
"swap": {
...
},
"procstat": [
{
...
},
{
...
}
]
}
}
"metrics": {
"procstat": [
{
"pattern": "/usr/bin/vault",
"measurement": [
"cpu_time",
"pid_count",
...
],
"append_dimensions": {
"Organization": "${organization}",
"Environment": "${environment}",
"Project": "${project}",
"ClusterId": "${cluster_id}"
}
},
...
],
...
}
{
"agent": {
"metrics_collection_interval": 60,
"run_as_user": "cwagent"
},
"metrics": {
...
}
"logs": {
...
}
}
resource "aws_cloudwatch_metric_alarm" "vault_process" {
count = var.monitor_vault_processes ? var.vault_cluster_node_count : 0
alarm_name = "${local.prefix}-${var.vault_cluster_id}-${count.index}-vault-process-not-running"
comparison_operator = "LessThanThreshold"
evaluation_periods = 1
datapoints_to_alarm = 1
metric_name = "procstat_lookup_pid_count"
namespace = "${var.org}/${var.env}/${var.project}/CWAgent"
period = 60
statistic = "Minimum"
threshold = 1
alarm_description = "Checks that the vault process is running"
insufficient_data_actions = []
treat_missing_data = "breaching"
dimensions = {
"AutoScalingGroupName" = aws_autoscaling_group.vault_node[count.index].name
"Organization" = var.org
"Environment" = var.env
"Project" = var.project
"ClusterId" = var.vault_cluster_id
"pattern" = "/usr/bin/vault"
"pid_finder" = "native"
}
}
resource "aws_cloudwatch_metric_alarm" "disks" {
count = var.monitor_vault_disk_usage ? var.vault_cluster_node_count : 0
alarm_name = "${local.prefix}-${var.vault_cluster_id}-${count.index}-vault-root-volume-full"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = 5
datapoints_to_alarm = 5
metric_name = "disk_used_percent"
namespace = "${var.org}/${var.env}/${var.project}/CWAgent"
period = 60
statistic = "Average"
threshold = 80
alarm_description = "Checks that the root volume is not full"
insufficient_data_actions = []
treat_missing_data = "notBreaching"
dimensions = {
"AutoScalingGroupName" = aws_autoscaling_group.vault_node[count.index].name
"Organization" = var.org
"Environment" = var.env
"Project" = var.project
"ClusterId" = var.vault_cluster_id
"fstype" = "xfs"
"path" = "/"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment