Last active
April 7, 2020 08:28
-
-
Save david-martin/5651cd085c8dd7b8cc0e5a1025023b1b to your computer and use it in GitHub Desktop.
rhmi slo dashboard
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"annotations": { | |
"list": [ | |
{ | |
"builtIn": 1, | |
"datasource": "-- Grafana --", | |
"enable": true, | |
"hide": true, | |
"iconColor": "rgba(0, 211, 255, 1)", | |
"name": "Annotations & Alerts", | |
"type": "dashboard" | |
} | |
] | |
}, | |
"editable": true, | |
"gnetId": null, | |
"graphTooltip": 0, | |
"id": 17, | |
"iteration": 1586247995722, | |
"links": [], | |
"panels": [ | |
{ | |
"collapsed": false, | |
"gridPos": { | |
"h": 1, | |
"w": 24, | |
"x": 0, | |
"y": 0 | |
}, | |
"id": 2, | |
"panels": [], | |
"title": "SLO Summary (based on critical Alerts over the last 28 days & SLO of 99.9%)", | |
"type": "row" | |
}, | |
{ | |
"cacheTimeout": null, | |
"colorBackground": true, | |
"colorValue": false, | |
"colors": [ | |
"#C4162A", | |
"rgba(237, 129, 40, 0.89)", | |
"#299c46" | |
], | |
"decimals": 2, | |
"description": "% of time where *no* critical alerts were firing over the last 28 days", | |
"format": "percentunit", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"gridPos": { | |
"h": 4, | |
"w": 3, | |
"x": 0, | |
"y": 1 | |
}, | |
"id": 6, | |
"interval": null, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [ | |
{ | |
"name": "value to text", | |
"value": 1 | |
}, | |
{ | |
"name": "range to text", | |
"value": 2 | |
} | |
], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"options": {}, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [ | |
{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
} | |
], | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"tableColumn": "", | |
"targets": [ | |
{ | |
"expr": "clamp_max(\n sum_over_time(\n (clamp_max(\n sum(absent(ALERTS{alertstate=\"firing\", severity=\"critical\"}))\n , 1\n ))[28d:10m]\n ) / (28 * 24 * 6) > 0, 1\n)", | |
"format": "time_series", | |
"instant": false, | |
"intervalFactor": 1, | |
"refId": "A" | |
} | |
], | |
"thresholds": "0.999,0.999", | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Overall SLO %", | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [ | |
{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
} | |
], | |
"valueName": "current" | |
}, | |
{ | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": [ | |
"#299c46", | |
"rgba(237, 129, 40, 0.89)", | |
"#d44a3a" | |
], | |
"decimals": null, | |
"description": "Total time where at least 1 critical alert was firing over the last 28 days", | |
"format": "ms", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"gridPos": { | |
"h": 4, | |
"w": 3, | |
"x": 3, | |
"y": 1 | |
}, | |
"hideTimeOverride": true, | |
"id": 10, | |
"interval": null, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [ | |
{ | |
"name": "value to text", | |
"value": 1 | |
}, | |
{ | |
"name": "range to text", | |
"value": 2 | |
} | |
], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"options": {}, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [ | |
{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
} | |
], | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"tableColumn": "", | |
"targets": [ | |
{ | |
"expr": " sum_over_time(\n (clamp_max(\n sum(ALERTS{alertstate=\"firing\", severity=\"critical\"})\n , 1\n ))[28d:10m]\n ) * (10 * 60 * 1000)", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"refId": "A" | |
} | |
], | |
"thresholds": "", | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Firing Time ", | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [ | |
{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
} | |
], | |
"valueName": "current" | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"description": "Total number of critical alerts firing over the last 28 days. ", | |
"fill": 1, | |
"gridPos": { | |
"h": 8, | |
"w": 18, | |
"x": 6, | |
"y": 1 | |
}, | |
"id": 12, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"options": {}, | |
"percentage": false, | |
"pointradius": 2, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"expr": "sum(ALERTS{severity='critical', alertstate='firing'}) or vector(0)", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"refId": "A" | |
} | |
], | |
"thresholds": [], | |
"timeFrom": "28d", | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Number of alerts firing ", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"decimals": 0, | |
"format": "none", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": false | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"cacheTimeout": null, | |
"colorBackground": true, | |
"colorValue": false, | |
"colors": [ | |
"#299c46", | |
"rgba(237, 129, 40, 0.89)", | |
"#C4162A" | |
], | |
"datasource": "Prometheus", | |
"description": "Total number of critical alerts currently firing", | |
"format": "none", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"gridPos": { | |
"h": 4, | |
"w": 3, | |
"x": 0, | |
"y": 5 | |
}, | |
"id": 4, | |
"interval": null, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [ | |
{ | |
"name": "value to text", | |
"value": 1 | |
}, | |
{ | |
"name": "range to text", | |
"value": 2 | |
} | |
], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"options": {}, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [ | |
{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
} | |
], | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"tableColumn": "", | |
"targets": [ | |
{ | |
"expr": "sum(ALERTS {severity='critical', alertstate='firing'})", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"refId": "A" | |
} | |
], | |
"thresholds": "1,1", | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Alerts Firing", | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [ | |
{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
} | |
], | |
"valueName": "current" | |
}, | |
{ | |
"cacheTimeout": null, | |
"colorBackground": true, | |
"colorValue": false, | |
"colors": [ | |
"#C4162A", | |
"rgba(237, 129, 40, 0.89)", | |
"#299c46" | |
], | |
"decimals": 2, | |
"description": "Amount of time left where at least 1 critical alert can be firing before the SLO is breached for the last 28 days", | |
"format": "ms", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"gridPos": { | |
"h": 4, | |
"w": 3, | |
"x": 3, | |
"y": 5 | |
}, | |
"id": 8, | |
"interval": null, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [ | |
{ | |
"name": "value to text", | |
"value": 1 | |
}, | |
{ | |
"name": "range to text", | |
"value": 2 | |
} | |
], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"options": {}, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [ | |
{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
} | |
], | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"tableColumn": "", | |
"targets": [ | |
{ | |
"expr": "$slo_001_ms - (sum_over_time(\n (clamp_max(\n sum(ALERTS{alertstate=\"firing\", severity=\"critical\"})\n , 1\n ))[28d:10m]\n ) * (10 * 60 * 1000))", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"refId": "A" | |
} | |
], | |
"thresholds": "0,0", | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Remaining Error Budget", | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [ | |
{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
} | |
], | |
"valueName": "current" | |
} | |
], | |
"schemaVersion": 18, | |
"style": "dark", | |
"tags": [], | |
"templating": { | |
"list": [ | |
{ | |
"current": { | |
"text": "28", | |
"value": "28" | |
}, | |
"hide": 2, | |
"label": "SLO in days", | |
"name": "slo_days", | |
"options": [ | |
{ | |
"selected": true, | |
"text": "28", | |
"value": "28" | |
} | |
], | |
"query": "28", | |
"skipUrlSync": false, | |
"type": "constant" | |
}, | |
{ | |
"allValue": null, | |
"current": { | |
"text": "2419200000", | |
"value": "2419200000" | |
}, | |
"datasource": "Prometheus", | |
"definition": "query_result(vector($slo_days * 24 * 60 * 60 * 1000))", | |
"hide": 2, | |
"includeAll": false, | |
"label": "SLO in ms", | |
"multi": false, | |
"name": "slo_ms", | |
"options": [ | |
{ | |
"selected": true, | |
"text": "2419200000", | |
"value": "2419200000" | |
} | |
], | |
"query": "query_result(vector($slo_days * 24 * 60 * 60 * 1000))", | |
"refresh": 0, | |
"regex": "/.*\\s(.*)\\s.*/", | |
"skipUrlSync": false, | |
"sort": 0, | |
"tagValuesQuery": "", | |
"tags": [], | |
"tagsQuery": "", | |
"type": "query", | |
"useTags": false | |
}, | |
{ | |
"allValue": null, | |
"current": { | |
"text": "2416780800", | |
"value": "2416780800" | |
}, | |
"datasource": "Prometheus", | |
"definition": "query_result(vector($slo_ms * 0.999))", | |
"hide": 2, | |
"includeAll": false, | |
"label": "99.9% of SLO in ms", | |
"multi": false, | |
"name": "slo_999_ms", | |
"options": [ | |
{ | |
"selected": true, | |
"text": "2416780800", | |
"value": "2416780800" | |
} | |
], | |
"query": "query_result(vector($slo_ms * 0.999))", | |
"refresh": 0, | |
"regex": "/.*\\s(.*)\\s.*/", | |
"skipUrlSync": false, | |
"sort": 0, | |
"tagValuesQuery": "", | |
"tags": [], | |
"tagsQuery": "", | |
"type": "query", | |
"useTags": false | |
}, | |
{ | |
"allValue": null, | |
"current": { | |
"text": "2419200", | |
"value": "2419200" | |
}, | |
"datasource": "Prometheus", | |
"definition": "query_result(vector($slo_ms * 0.001))", | |
"hide": 2, | |
"includeAll": false, | |
"label": "0.1% in ms", | |
"multi": false, | |
"name": "slo_001_ms", | |
"options": [ | |
{ | |
"selected": true, | |
"text": "2419200", | |
"value": "2419200" | |
} | |
], | |
"query": "query_result(vector($slo_ms * 0.001))", | |
"refresh": 0, | |
"regex": "/.*\\s(.*)\\s.*/", | |
"skipUrlSync": false, | |
"sort": 0, | |
"tagValuesQuery": "", | |
"tags": [], | |
"tagsQuery": "", | |
"type": "query", | |
"useTags": false | |
} | |
] | |
}, | |
"time": { | |
"from": "now-15m", | |
"to": "now" | |
}, | |
"timepicker": { | |
"refresh_intervals": [ | |
"5s", | |
"10s", | |
"30s", | |
"1m", | |
"5m", | |
"15m", | |
"30m", | |
"1h", | |
"2h", | |
"1d" | |
], | |
"time_options": [ | |
"5m", | |
"15m", | |
"1h", | |
"6h", | |
"12h", | |
"24h", | |
"2d", | |
"7d", | |
"30d" | |
] | |
}, | |
"timezone": "", | |
"title": "SLO summary", | |
"uid": "eT5llOjWz", | |
"version": 23 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment