Skip to content

Instantly share code, notes, and snippets.

@Mikulas
Created October 2, 2017 07:24
Show Gist options
  • Save Mikulas/76318dcb63eaac6e8707a9c6cb3c5d52 to your computer and use it in GitHub Desktop.
Save Mikulas/76318dcb63eaac6e8707a9c6cb3c5d52 to your computer and use it in GitHub Desktop.
Kubernetes Heapster Grafana Dashboard: template by kops instance group
{
"__inputs": [
{
"name": "DS_INFLUXDB-DATASOURCE",
"label": "influxdb-datasource",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "v4.4.3"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [],
"refresh": false,
"rows": [
{
"collapse": false,
"height": 294,
"panels": [
{
"alert": {
"conditions": [
{
"evaluator": {
"params": [
6000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"frequency": "60s",
"handler": 1,
"message": "Overall cluster CPU usage is high",
"name": "Overall Cluster CPU Usage alert",
"noDataState": "keep_state",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"decimals": 1,
"editable": true,
"error": false,
"fill": 0,
"grid": {},
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": true,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Usage",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "cpu/usage_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND \"labels\" =~ /instance-group=nodes/ AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": false,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Limit",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "cpu/limit",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"cpu/limit\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Usage",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": true,
"measurement": "cpu/usage_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND \"labels\" =~ /instance-group=nodes/ AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": false,
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
}
]
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 6000
}
],
"timeFrom": null,
"timeShift": null,
"title": "Overall Cluster CPU Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": "Millicores",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"alert": {
"conditions": [
{
"evaluator": {
"params": [
35000000000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"D",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"frequency": "60s",
"handler": 1,
"message": "Overall memory usage is high",
"name": "Overall Cluster Memory Usage alert",
"noDataState": "keep_state",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"editable": true,
"error": false,
"fill": 0,
"grid": {},
"id": 1,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Usage",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "memory/usage",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Working Set",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "memory/working_set",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/working_set\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Limit",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": false,
"measurement": "memory/limit",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/limit\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Usage",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": true,
"measurement": "memory/usage",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "D",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
}
]
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 35000000000
}
],
"timeFrom": null,
"timeShift": null,
"title": "Overall Cluster Memory Usage",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": "bytes",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Cluster Overview",
"titleSize": "h6"
},
{
"collapse": false,
"height": 323,
"panels": [
{
"alert": {
"conditions": [
{
"evaluator": {
"params": [
1500
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"frequency": "60s",
"handler": 1,
"message": "A single node has load over 1.5/2.0",
"name": "CPU Usage by Node alert",
"noDataState": "keep_state",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"editable": true,
"error": false,
"fill": 0,
"grid": {},
"id": 4,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Usage $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "cpu/usage_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Usage $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": true,
"measurement": "cpu/usage_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
}
]
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 1500
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage by Node",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"transparent": false,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": "Millicores",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "CPU Usage",
"titleSize": "h6"
},
{
"collapse": false,
"height": 380,
"panels": [
{
"alert": {
"conditions": [
{
"evaluator": {
"params": [
6000000000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"frequency": "60s",
"handler": 1,
"message": "Single node memory usage is hight.",
"name": "Memory Usage by Node alert",
"noDataState": "keep_state",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"editable": true,
"error": false,
"fill": 0,
"grid": {},
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Working Set $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": false,
"measurement": "memory/working_set",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/working_set\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Usage $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": false,
"measurement": "memory/usage",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Limit $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": false,
"measurement": "memory/limit",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/request\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"refId": "D",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Usage $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": true,
"measurement": "memory/usage",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
}
]
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 6000000000
}
],
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage by Node",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"transparent": false,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": "Bytes",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Memory Usage",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"editable": true,
"error": false,
"fill": 0,
"grid": {},
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Tx",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "network/tx_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"network/tx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Rx",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "network/rx_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"network/rx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Overall Cluster Network Usage",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"editable": true,
"error": false,
"fill": 1,
"grid": {},
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Tx $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "network/tx_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"network/tx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"rawQuery": false,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Rx $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "network/rx_rate",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"network/rx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"rawQuery": false,
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Network Usage by Node",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"editable": true,
"error": false,
"fill": 1,
"grid": {},
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Usage",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "filesystem/usage",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"filesystem/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
},
{
"alias": "Limit",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "filesystem/limit",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"filesystem/limit\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Overall Cluster Filesystem Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"alert": {
"conditions": [
{
"evaluator": {
"params": [
40000000000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"frequency": "60s",
"handler": 1,
"message": "Disk usage is high on node",
"name": "Filesystem Usage by Node alert",
"noDataState": "keep_state",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_INFLUXDB-DATASOURCE}",
"editable": true,
"error": false,
"fill": 0,
"grid": {},
"id": 11,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"alias": "Usage $tag_nodename",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"nodename"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "filesystem/usage",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"value\") FROM \"filesystem/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": [
{
"key": "type",
"operator": "=",
"value": "node"
},
{
"condition": "AND",
"key": "labels",
"operator": "=~",
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/"
}
]
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 40000000000
}
],
"timeFrom": null,
"timeShift": null,
"title": "Filesystem Usage by Node",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Others",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": null,
"current": {
"tags": [],
"text": "nodes",
"value": [
"nodes"
]
},
"hide": 0,
"includeAll": false,
"label": "Instance group",
"multi": true,
"name": "ig",
"options": [
{
"selected": false,
"text": "master",
"value": "master"
},
{
"selected": true,
"text": "nodes",
"value": "nodes"
},
{
"selected": false,
"text": "jenkins",
"value": "jenkins"
}
],
"query": "master,nodes,jenkins",
"type": "custom"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"now": true,
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Cluster",
"version": 37
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment