Created
October 2, 2017 07:24
-
-
Save Mikulas/76318dcb63eaac6e8707a9c6cb3c5d52 to your computer and use it in GitHub Desktop.
Kubernetes Heapster Grafana Dashboard: template by kops instance group
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"__inputs": [ | |
{ | |
"name": "DS_INFLUXDB-DATASOURCE", | |
"label": "influxdb-datasource", | |
"description": "", | |
"type": "datasource", | |
"pluginId": "influxdb", | |
"pluginName": "InfluxDB" | |
} | |
], | |
"__requires": [ | |
{ | |
"type": "grafana", | |
"id": "grafana", | |
"name": "Grafana", | |
"version": "v4.4.3" | |
}, | |
{ | |
"type": "panel", | |
"id": "graph", | |
"name": "Graph", | |
"version": "" | |
}, | |
{ | |
"type": "datasource", | |
"id": "influxdb", | |
"name": "InfluxDB", | |
"version": "1.0.0" | |
} | |
], | |
"annotations": { | |
"list": [] | |
}, | |
"editable": true, | |
"gnetId": null, | |
"graphTooltip": 0, | |
"hideControls": false, | |
"id": null, | |
"links": [], | |
"refresh": false, | |
"rows": [ | |
{ | |
"collapse": false, | |
"height": 294, | |
"panels": [ | |
{ | |
"alert": { | |
"conditions": [ | |
{ | |
"evaluator": { | |
"params": [ | |
6000 | |
], | |
"type": "gt" | |
}, | |
"operator": { | |
"type": "and" | |
}, | |
"query": { | |
"params": [ | |
"C", | |
"5m", | |
"now" | |
] | |
}, | |
"reducer": { | |
"params": [], | |
"type": "avg" | |
}, | |
"type": "query" | |
} | |
], | |
"executionErrorState": "alerting", | |
"frequency": "60s", | |
"handler": 1, | |
"message": "Overall cluster CPU usage is high", | |
"name": "Overall Cluster CPU Usage alert", | |
"noDataState": "keep_state", | |
"notifications": [] | |
}, | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"decimals": 1, | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": {}, | |
"id": 3, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": true, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 6, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Usage", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "cpu/usage_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND \"labels\" =~ /instance-group=nodes/ AND $timeFilter GROUP BY time($interval) fill(null)", | |
"rawQuery": false, | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Limit", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "cpu/limit", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"cpu/limit\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "B", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Usage", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": true, | |
"measurement": "cpu/usage_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND \"labels\" =~ /instance-group=nodes/ AND $timeFilter GROUP BY time($interval) fill(null)", | |
"rawQuery": false, | |
"refId": "C", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
} | |
] | |
} | |
], | |
"thresholds": [ | |
{ | |
"colorMode": "critical", | |
"fill": true, | |
"line": true, | |
"op": "gt", | |
"value": 6000 | |
} | |
], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Overall Cluster CPU Usage", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": "Millicores", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
}, | |
{ | |
"alert": { | |
"conditions": [ | |
{ | |
"evaluator": { | |
"params": [ | |
35000000000 | |
], | |
"type": "gt" | |
}, | |
"operator": { | |
"type": "and" | |
}, | |
"query": { | |
"params": [ | |
"D", | |
"5m", | |
"now" | |
] | |
}, | |
"reducer": { | |
"params": [], | |
"type": "avg" | |
}, | |
"type": "query" | |
} | |
], | |
"executionErrorState": "alerting", | |
"frequency": "60s", | |
"handler": 1, | |
"message": "Overall memory usage is high", | |
"name": "Overall Cluster Memory Usage alert", | |
"noDataState": "keep_state", | |
"notifications": [] | |
}, | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": {}, | |
"id": 1, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"max": true, | |
"min": true, | |
"show": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 6, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Usage", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "memory/usage", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "B", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Working Set", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "memory/working_set", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/working_set\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "C", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Limit", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": false, | |
"measurement": "memory/limit", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/limit\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Usage", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": true, | |
"measurement": "memory/usage", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "D", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
} | |
] | |
} | |
], | |
"thresholds": [ | |
{ | |
"colorMode": "critical", | |
"fill": true, | |
"line": true, | |
"op": "gt", | |
"value": 35000000000 | |
} | |
], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Overall Cluster Memory Usage", | |
"tooltip": { | |
"shared": true, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "bytes", | |
"label": "bytes", | |
"logBase": 1, | |
"max": null, | |
"min": "0", | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
} | |
], | |
"repeat": null, | |
"repeatIteration": null, | |
"repeatRowId": null, | |
"showTitle": true, | |
"title": "Cluster Overview", | |
"titleSize": "h6" | |
}, | |
{ | |
"collapse": false, | |
"height": 323, | |
"panels": [ | |
{ | |
"alert": { | |
"conditions": [ | |
{ | |
"evaluator": { | |
"params": [ | |
1500 | |
], | |
"type": "gt" | |
}, | |
"operator": { | |
"type": "and" | |
}, | |
"query": { | |
"params": [ | |
"B", | |
"5m", | |
"now" | |
] | |
}, | |
"reducer": { | |
"params": [], | |
"type": "avg" | |
}, | |
"type": "query" | |
} | |
], | |
"executionErrorState": "alerting", | |
"frequency": "60s", | |
"handler": 1, | |
"message": "A single node has load over 1.5/2.0", | |
"name": "CPU Usage by Node alert", | |
"noDataState": "keep_state", | |
"notifications": [] | |
}, | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": {}, | |
"id": 4, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"max": true, | |
"min": true, | |
"rightSide": true, | |
"show": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Usage $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "cpu/usage_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Usage $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": true, | |
"measurement": "cpu/usage_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"cpu/usage_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"refId": "B", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
} | |
] | |
} | |
], | |
"thresholds": [ | |
{ | |
"colorMode": "critical", | |
"fill": true, | |
"line": true, | |
"op": "gt", | |
"value": 1500 | |
} | |
], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "CPU Usage by Node", | |
"tooltip": { | |
"shared": true, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"transparent": false, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": "Millicores", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
} | |
], | |
"repeat": null, | |
"repeatIteration": null, | |
"repeatRowId": null, | |
"showTitle": false, | |
"title": "CPU Usage", | |
"titleSize": "h6" | |
}, | |
{ | |
"collapse": false, | |
"height": 380, | |
"panels": [ | |
{ | |
"alert": { | |
"conditions": [ | |
{ | |
"evaluator": { | |
"params": [ | |
6000000000 | |
], | |
"type": "gt" | |
}, | |
"operator": { | |
"type": "and" | |
}, | |
"query": { | |
"params": [ | |
"C", | |
"5m", | |
"now" | |
] | |
}, | |
"reducer": { | |
"params": [], | |
"type": "avg" | |
}, | |
"type": "query" | |
} | |
], | |
"executionErrorState": "alerting", | |
"frequency": "60s", | |
"handler": 1, | |
"message": "Single node memory usage is hight.", | |
"name": "Memory Usage by Node alert", | |
"noDataState": "keep_state", | |
"notifications": [] | |
}, | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": {}, | |
"id": 2, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"max": true, | |
"min": true, | |
"rightSide": true, | |
"show": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Working Set $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": false, | |
"measurement": "memory/working_set", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/working_set\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Usage $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": false, | |
"measurement": "memory/usage", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"refId": "B", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Limit $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": false, | |
"measurement": "memory/limit", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/request\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"refId": "D", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Usage $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"hide": true, | |
"measurement": "memory/usage", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"memory/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"refId": "C", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
} | |
] | |
} | |
], | |
"thresholds": [ | |
{ | |
"colorMode": "critical", | |
"fill": true, | |
"line": true, | |
"op": "gt", | |
"value": 6000000000 | |
} | |
], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Memory Usage by Node", | |
"tooltip": { | |
"shared": true, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"transparent": false, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "bytes", | |
"label": "Bytes", | |
"logBase": 1, | |
"max": null, | |
"min": "0", | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
} | |
], | |
"repeat": null, | |
"repeatIteration": null, | |
"repeatRowId": null, | |
"showTitle": false, | |
"title": "Memory Usage", | |
"titleSize": "h6" | |
}, | |
{ | |
"collapse": false, | |
"height": "250px", | |
"panels": [ | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": {}, | |
"id": 7, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Tx", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "network/tx_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"network/tx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Rx", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "network/rx_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"network/rx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "B", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Overall Cluster Network Usage", | |
"tooltip": { | |
"shared": true, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "Bps", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": {}, | |
"id": 8, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Tx $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "network/tx_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"network/tx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"rawQuery": false, | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Rx $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "network/rx_rate", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"network/rx_rate\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"rawQuery": false, | |
"refId": "B", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Network Usage by Node", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "Bps", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": {}, | |
"id": 10, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Usage", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "filesystem/usage", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"filesystem/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
}, | |
{ | |
"alias": "Limit", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "filesystem/limit", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"filesystem/limit\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval) fill(null)", | |
"refId": "B", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Overall Cluster Filesystem Usage", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "bytes", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
}, | |
{ | |
"alert": { | |
"conditions": [ | |
{ | |
"evaluator": { | |
"params": [ | |
40000000000 | |
], | |
"type": "gt" | |
}, | |
"operator": { | |
"type": "and" | |
}, | |
"query": { | |
"params": [ | |
"A", | |
"5m", | |
"now" | |
] | |
}, | |
"reducer": { | |
"params": [], | |
"type": "avg" | |
}, | |
"type": "query" | |
} | |
], | |
"executionErrorState": "alerting", | |
"frequency": "60s", | |
"handler": 1, | |
"message": "Disk usage is high on node", | |
"name": "Filesystem Usage by Node alert", | |
"noDataState": "keep_state", | |
"notifications": [] | |
}, | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "${DS_INFLUXDB-DATASOURCE}", | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": {}, | |
"id": 11, | |
"legend": { | |
"alignAsTable": true, | |
"avg": false, | |
"current": true, | |
"max": false, | |
"min": false, | |
"rightSide": true, | |
"show": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"alias": "Usage $tag_nodename", | |
"dsType": "influxdb", | |
"groupBy": [ | |
{ | |
"params": [ | |
"$interval" | |
], | |
"type": "time" | |
}, | |
{ | |
"params": [ | |
"nodename" | |
], | |
"type": "tag" | |
}, | |
{ | |
"params": [ | |
"null" | |
], | |
"type": "fill" | |
} | |
], | |
"measurement": "filesystem/usage", | |
"orderByTime": "ASC", | |
"policy": "default", | |
"query": "SELECT sum(\"value\") FROM \"filesystem/usage\" WHERE \"type\" = 'node' AND $timeFilter GROUP BY time($interval), \"nodename\" fill(null)", | |
"refId": "A", | |
"resultFormat": "time_series", | |
"select": [ | |
[ | |
{ | |
"params": [ | |
"value" | |
], | |
"type": "field" | |
}, | |
{ | |
"params": [], | |
"type": "sum" | |
} | |
] | |
], | |
"tags": [ | |
{ | |
"key": "type", | |
"operator": "=", | |
"value": "node" | |
}, | |
{ | |
"condition": "AND", | |
"key": "labels", | |
"operator": "=~", | |
"value": "/(^|,)kops\\/instance-group:$ig(,|$)/" | |
} | |
] | |
} | |
], | |
"thresholds": [ | |
{ | |
"colorMode": "critical", | |
"fill": true, | |
"line": true, | |
"op": "gt", | |
"value": 40000000000 | |
} | |
], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Filesystem Usage by Node", | |
"tooltip": { | |
"shared": true, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "bytes", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
} | |
], | |
"repeat": null, | |
"repeatIteration": null, | |
"repeatRowId": null, | |
"showTitle": false, | |
"title": "Others", | |
"titleSize": "h6" | |
} | |
], | |
"schemaVersion": 14, | |
"style": "dark", | |
"tags": [], | |
"templating": { | |
"list": [ | |
{ | |
"allValue": null, | |
"current": { | |
"tags": [], | |
"text": "nodes", | |
"value": [ | |
"nodes" | |
] | |
}, | |
"hide": 0, | |
"includeAll": false, | |
"label": "Instance group", | |
"multi": true, | |
"name": "ig", | |
"options": [ | |
{ | |
"selected": false, | |
"text": "master", | |
"value": "master" | |
}, | |
{ | |
"selected": true, | |
"text": "nodes", | |
"value": "nodes" | |
}, | |
{ | |
"selected": false, | |
"text": "jenkins", | |
"value": "jenkins" | |
} | |
], | |
"query": "master,nodes,jenkins", | |
"type": "custom" | |
} | |
] | |
}, | |
"time": { | |
"from": "now-6h", | |
"to": "now" | |
}, | |
"timepicker": { | |
"now": true, | |
"refresh_intervals": [ | |
"5s", | |
"10s", | |
"30s", | |
"1m", | |
"5m", | |
"15m", | |
"30m", | |
"1h", | |
"2h", | |
"1d" | |
], | |
"time_options": [ | |
"5m", | |
"15m", | |
"1h", | |
"6h", | |
"12h", | |
"24h", | |
"2d", | |
"7d", | |
"30d" | |
] | |
}, | |
"timezone": "browser", | |
"title": "Cluster", | |
"version": 37 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment