Created
June 12, 2018 16:24
-
-
Save MatMoore/ea87fdfac5a6bb9c56ec6cc4b7d57f3d to your computer and use it in GitHub Desktop.
Grafana dashboard for incidents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"annotations": { | |
"list": [] | |
}, | |
"editable": true, | |
"gnetId": null, | |
"graphTooltip": 0, | |
"hideControls": false, | |
"id": 93, | |
"links": [], | |
"rows": [ | |
{ | |
"collapse": false, | |
"height": "250px", | |
"panels": [ | |
{ | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": true, | |
"colors": [ | |
"rgba(255, 0, 0, 0.9)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(50, 172, 45, 0.97)" | |
], | |
"datasource": "Graphite", | |
"format": "none", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"id": 4, | |
"interval": null, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [ | |
{ | |
"name": "value to text", | |
"value": 1 | |
}, | |
{ | |
"name": "range to text", | |
"value": 2 | |
} | |
], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": " errors", | |
"postfixFontSize": "150%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [ | |
{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
} | |
], | |
"span": 8, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"tableColumn": "", | |
"targets": [ | |
{ | |
"refId": "A", | |
"target": "integral(monitoring-1_management.cdn_fastly-govuk.requests-status_5xx)", | |
"textEditor": true | |
} | |
], | |
"thresholds": "", | |
"title": "How many 5xx errors were served from the CDN to end users?", | |
"type": "singlestat", | |
"valueFontSize": "200%", | |
"valueMaps": [ | |
{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
} | |
], | |
"valueName": "max" | |
}, | |
{ | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": true, | |
"colors": [ | |
"rgba(245, 54, 54, 0.9)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(50, 172, 45, 0.97)" | |
], | |
"datasource": "Graphite", | |
"format": "none", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"id": 5, | |
"interval": null, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [ | |
{ | |
"name": "value to text", | |
"value": 1 | |
}, | |
{ | |
"name": "range to text", | |
"value": 2 | |
} | |
], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": " errors", | |
"postfixFontSize": "150%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [ | |
{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
} | |
], | |
"span": 4, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"tableColumn": "", | |
"targets": [ | |
{ | |
"refId": "A", | |
"target": "integral(sumSeries(stats.whitehall-backend-*_backend.nginx_logs.*.http_5*,stats.backend-*_backend.nginx_logs.*.http_5*))", | |
"textEditor": true | |
} | |
], | |
"thresholds": "", | |
"title": "How many errors were served from publishing applications?", | |
"type": "singlestat", | |
"valueFontSize": "200%", | |
"valueMaps": [ | |
{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
} | |
], | |
"valueName": "max" | |
} | |
], | |
"repeat": null, | |
"repeatIteration": null, | |
"repeatRowId": null, | |
"showTitle": false, | |
"title": "Dashboard Row", | |
"titleSize": "h6" | |
}, | |
{ | |
"collapse": false, | |
"height": 250, | |
"panels": [ | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "Graphite", | |
"fill": 1, | |
"id": 1, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"span": 6, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"refId": "A", | |
"target": "integral(monitoring-1_management.cdn_fastly-govuk.requests-status_5xx)", | |
"textEditor": true | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "How many 5xx errors were served from the CDN to end users?", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": "Total number in time period", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "Graphite", | |
"fill": 1, | |
"id": 3, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [ | |
{} | |
], | |
"spaceLength": 10, | |
"span": 6, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"refId": "A", | |
"target": "integral(sumSeries(stats.whitehall-backend-*_backend.nginx_logs.*.http_5*))", | |
"textEditor": true | |
}, | |
{ | |
"refId": "B", | |
"target": "integral(sumSeries(stats.backend-*_backend.nginx_logs.*.http_5*))", | |
"textEditor": true | |
}, | |
{ | |
"refId": "C", | |
"target": "", | |
"textEditor": true | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "How many errors have been served by publishing applications?", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
] | |
} | |
], | |
"repeat": null, | |
"repeatIteration": null, | |
"repeatRowId": null, | |
"showTitle": false, | |
"title": "Dashboard Row", | |
"titleSize": "h6" | |
}, | |
{ | |
"collapse": false, | |
"height": 250, | |
"panels": [ | |
{ | |
"content": "# How to use this dashboard\n1. Set the time range to the time of the incident\n2. Read numbers\n\n# How it works\nThe CDN error counts come from monitoring-1.management where (I think) we run a monitoring app that pulls down logs from fastly. I haven't investigated where this comes from so it might not be accurate.\n\nIf you know how this works, please update this message. 🙃\n\nErrors in publishing apps are displayed separately because these requests do not go through the CDN, and affect publishers rather than end users.\n\nThese counts include some apps that may not actually be publishing apps, but they're hosted on the backend machines and serve HTTP requests - for example the content audit tool, or content tagger.", | |
"id": 2, | |
"links": [], | |
"mode": "markdown", | |
"span": 12, | |
"title": "About", | |
"type": "text" | |
} | |
], | |
"repeat": null, | |
"repeatIteration": null, | |
"repeatRowId": null, | |
"showTitle": false, | |
"title": "Dashboard Row", | |
"titleSize": "h6" | |
} | |
], | |
"schemaVersion": 14, | |
"style": "dark", | |
"tags": [], | |
"templating": { | |
"list": [] | |
}, | |
"time": { | |
"from": "now-6h", | |
"to": "now" | |
}, | |
"timepicker": { | |
"refresh_intervals": [ | |
"5s", | |
"10s", | |
"30s", | |
"1m", | |
"5m", | |
"15m", | |
"30m", | |
"1h", | |
"2h", | |
"1d" | |
], | |
"time_options": [ | |
"5m", | |
"15m", | |
"1h", | |
"6h", | |
"12h", | |
"24h", | |
"2d", | |
"7d", | |
"30d" | |
] | |
}, | |
"timezone": "", | |
"title": "How many errors did we serve during an incident?", | |
"version": 10 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment