Skip to content

Instantly share code, notes, and snippets.

@david-bc
Created July 27, 2018 14:52
Show Gist options
  • Save david-bc/01a0c54d17c1025be94eff9f8c0ca879 to your computer and use it in GitHub Desktop.
Save david-bc/01a0c54d17c1025be94eff9f8c0ca879 to your computer and use it in GitHub Desktop.
{
"annotations": {
"list": [
{
"$$hashKey": "object:2508",
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 69,
"iteration": 1531762262408,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 17,
"panels": [],
"repeat": null,
"title": "Availability",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Defined as the rate of messages per 1m that are able to be produced in 20 seconds\n\nhttps://github.com/linkedin/kafka-monitor/wiki/Service-Overview",
"fill": 1,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 1
},
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:2652",
"expr": "kafka_monitor_produce_service_produce_availability_avg_1m",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}}",
"refId": "A",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Produce Availability 1m Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": "1.1",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Defined as the rate of messages per 1m that are able to be consumed in 20 seconds\n\nhttps://github.com/linkedin/kafka-monitor/wiki/Service-Overview",
"fill": 1,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 1
},
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:2706",
"expr": "kafka_monitor_consume_service_consume_availability_avg_1m",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}}",
"refId": "A",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Consume Availability 1m Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": "1.1",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
},
"id": 18,
"panels": [],
"repeat": null,
"title": "Latency",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "The average latency of records from producer to consumer",
"fill": 1,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 9
},
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:2760",
"expr": "kafka_monitor_consume_service_records_delay_ms_avg",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "A",
"step": 40
},
{
"$$hashKey": "object:2761",
"expr": "kafka_monitor_consume_service_records_delay_ms_max",
"format": "time_series",
"hide": true,
"intervalFactor": 2,
"legendFormat": "Max",
"refId": "B",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Average End-to-End Message Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "The maximum latency of records from producer to consumer",
"fill": 1,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 9
},
"id": 12,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:2835",
"expr": "kafka_monitor_consume_service_records_delay_ms_avg",
"format": "time_series",
"hide": true,
"intervalFactor": 2,
"legendFormat": "Average",
"refId": "A",
"step": 40
},
{
"$$hashKey": "object:2836",
"expr": "kafka_monitor_consume_service_records_delay_ms_max",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
"legendFormat": "Max",
"refId": "B",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Max End-to-End Message Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 19,
"panels": [],
"repeat": null,
"title": "Produce/Consume Rates",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 17
},
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:2909",
"expr": "irate(kafka_monitor_consume_service_records_consumed_total[1m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}}",
"refId": "A",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Total Records Consumed Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 17
},
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:2962",
"expr": "irate(kafka_monitor_produce_service_records_produced_total[1m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}}",
"refId": "A",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Total Records Produced Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Rate per 1m",
"fill": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 17
},
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:3015",
"expr": "kafka_monitor_produce_service_records_produced_rate_partition",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}} Partition: {{partition}}",
"refId": "A",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Produce Rate by Partition",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 24
},
"id": 20,
"panels": [],
"repeat": null,
"title": "Errors and SLA Violations",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "The average number of errors per second",
"fill": 1,
"gridPos": {
"h": 7,
"w": 6,
"x": 0,
"y": 25
},
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:3068",
"expr": "irate(kafka_monitor_consume_service_consume_error_total[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}}",
"refId": "A",
"step": 60
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Consume Errors Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "The average number of records per second that are lost",
"fill": 1,
"gridPos": {
"h": 7,
"w": 6,
"x": 6,
"y": 25
},
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:3121",
"expr": "irate(kafka_monitor_consume_service_records_lost_total[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}}",
"refId": "A",
"step": 60
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Records Lost",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "The rate of records that are either lost or arrive after maximum allowed latency under SLA (20s)",
"fill": 1,
"gridPos": {
"h": 7,
"w": 6,
"x": 12,
"y": 25
},
"id": 11,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:3174",
"expr": "irate(kafka_monitor_consume_service_records_delayed_total[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}}",
"refId": "A",
"step": 60
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Failed SLA Records Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 25
},
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:3227",
"expr": "kafka_monitor_produce_service_produce_error_rate_partition",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Environment: {{environment}} Partition: {{partition}}",
"refId": "A",
"step": 40
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Produce Error Rate by Partition",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"refresh": "10s",
"schemaVersion": 16,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Kafka SLA Monitor Copy",
"uid": "2zQxlfdik",
"version": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment