Created
September 1, 2020 09:10
-
-
Save anilsakr/18ff74f85cd6594fef41d1c5d491b6ce to your computer and use it in GitHub Desktop.
Kubernetes Setup for Prometheus and Grafana
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: batch/v1 | |
kind: Job | |
metadata: | |
name: grafana-import-dashboards | |
namespace: monitoring | |
labels: | |
app: grafana | |
component: import-dashboards | |
spec: | |
template: | |
metadata: | |
name: grafana-import-dashboards | |
labels: | |
app: grafana | |
component: import-dashboards | |
spec: | |
serviceAccountName: prometheus-k8s | |
initContainers: | |
- name: wait-for-grafana | |
image: giantswarm/tiny-tools | |
args: | |
- /bin/sh | |
- -c | |
- > | |
set -x; | |
while [ $(curl -Lsw '%{http_code}' "http://grafana:3000" -o /dev/null) -ne 200 ]; do | |
echo '.' | |
sleep 15; | |
done | |
containers: | |
- name: grafana-import-dashboards | |
image: giantswarm/tiny-tools | |
command: ["/bin/sh", "-c"] | |
workingDir: /opt/grafana-import-dashboards | |
args: | |
- > | |
for file in *-datasource.json ; do | |
if [ -e "$file" ] ; then | |
echo "importing $file" && | |
curl --silent --fail --show-error \ | |
--request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/datasources \ | |
--header "Content-Type: application/json" \ | |
--data-binary "@$file" ; | |
echo "" ; | |
fi | |
done ; | |
for file in *-dashboard.json ; do | |
if [ -e "$file" ] ; then | |
echo "importing $file" && | |
( echo '{"dashboard":'; \ | |
cat "$file"; \ | |
echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \ | |
| jq -c '.' \ | |
| curl --silent --fail --show-error \ | |
--request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/dashboards/import \ | |
--header "Content-Type: application/json" \ | |
--data-binary "@-" ; | |
echo "" ; | |
fi | |
done | |
env: | |
- name: GF_ADMIN_USER | |
valueFrom: | |
secretKeyRef: | |
name: grafana | |
key: admin-username | |
- name: GF_ADMIN_PASSWORD | |
valueFrom: | |
secretKeyRef: | |
name: grafana | |
key: admin-password | |
volumeMounts: | |
- name: config-volume | |
mountPath: /opt/grafana-import-dashboards | |
restartPolicy: Never | |
volumes: | |
- name: config-volume | |
configMap: | |
name: grafana-import-dashboards |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Derived from ./manifests | |
--- | |
apiVersion: v1 | |
kind: Namespace | |
metadata: | |
name: monitoring | |
--- | |
apiVersion: rbac.authorization.k8s.io/v1beta1 | |
kind: ClusterRoleBinding | |
metadata: | |
name: prometheus | |
roleRef: | |
apiGroup: rbac.authorization.k8s.io | |
kind: ClusterRole | |
name: prometheus | |
subjects: | |
- kind: ServiceAccount | |
name: prometheus-k8s | |
namespace: monitoring | |
--- | |
apiVersion: rbac.authorization.k8s.io/v1beta1 | |
kind: ClusterRole | |
metadata: | |
name: prometheus | |
rules: | |
- apiGroups: [""] | |
resources: | |
- nodes | |
- nodes/proxy | |
- services | |
- endpoints | |
- pods | |
verbs: ["get", "list", "watch"] | |
- apiGroups: [""] | |
resources: | |
- configmaps | |
verbs: ["get"] | |
- nonResourceURLs: ["/metrics"] | |
verbs: ["get"] | |
--- | |
apiVersion: v1 | |
kind: ServiceAccount | |
metadata: | |
name: prometheus-k8s | |
namespace: monitoring | |
--- | |
apiVersion: v1 | |
data: | |
default.tmpl: | | |
{{ define "__alertmanager" }}AlertManager{{ end }} | |
{{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} | |
{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} | |
{{ define "__description" }}{{ end }} | |
{{ define "__text_alert_list" }}{{ range . }}Labels: | |
{{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} | |
{{ end }}Annotations: | |
{{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} | |
{{ end }}Source: {{ .GeneratorURL }} | |
{{ end }}{{ end }} | |
{{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} | |
{{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} | |
{{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} | |
{{ define "slack.default.pretext" }}{{ end }} | |
{{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} | |
{{ define "slack.default.iconemoji" }}{{ end }} | |
{{ define "slack.default.iconurl" }}{{ end }} | |
{{ define "slack.default.text" }}{{ end }} | |
{{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }} | |
{{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }} | |
{{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} | |
{{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} | |
{{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} | |
{{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} | |
{{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} | |
{{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} | |
{{ if gt (len .Alerts.Firing) 0 -}} | |
Alerts Firing: | |
{{ template "__text_alert_list" .Alerts.Firing }} | |
{{- end }} | |
{{ if gt (len .Alerts.Resolved) 0 -}} | |
Alerts Resolved: | |
{{ template "__text_alert_list" .Alerts.Resolved }} | |
{{- end }} | |
{{- end }} | |
{{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} | |
{{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }} | |
{{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }} | |
{{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} | |
{{ define "email.default.html" }} | |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
<!-- | |
Style and HTML derived from https://github.com/mailgun/transactional-email-templates | |
The MIT License (MIT) | |
Copyright (c) 2014 Mailgun | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
--> | |
<html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<head style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<meta name="viewport" content="width=device-width" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
<title style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">{{ template "__subject" . }}</title> | |
</head> | |
<body itemscope="" itemtype="http://schema.org/EmailMessage" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; -webkit-font-smoothing: antialiased; -webkit-text-size-adjust: none; height: 100%; line-height: 1.6em; width: 100% !important; background-color: #f6f6f6; margin: 0; padding: 0;" bgcolor="#f6f6f6"> | |
<table style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; background-color: #f6f6f6; margin: 0;" bgcolor="#f6f6f6"> | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> | |
<td width="600" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; display: block !important; max-width: 600px !important; clear: both !important; width: 100% !important; margin: 0 auto; padding: 0;" valign="top"> | |
<div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; max-width: 600px; display: block; margin: 0 auto; padding: 0;"> | |
<table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; border-radius: 3px; background-color: #fff; margin: 0; border: 1px solid #e9e9e9;" bgcolor="#fff"> | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 16px; vertical-align: top; color: #fff; font-weight: 500; text-align: center; border-radius: 3px 3px 0 0; background-color: #E6522C; margin: 0; padding: 20px;" align="center" bgcolor="#E6522C" valign="top"> | |
{{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} | |
{{ .Name }}={{ .Value }} | |
{{ end }} | |
</td> | |
</tr> | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 10px;" valign="top"> | |
<table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> | |
<a href="{{ template "__alertmanagerURL" . }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #FFF; text-decoration: none; line-height: 2em; font-weight: bold; text-align: center; cursor: pointer; display: inline-block; border-radius: 5px; text-transform: capitalize; background-color: #348eda; margin: 0; border-color: #348eda; border-style: solid; border-width: 10px 20px;">View in {{ template "__alertmanager" . }}</a> | |
</td> | |
</tr> | |
{{ if gt (len .Alerts.Firing) 0 }} | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> | |
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Firing | len }}] Firing</strong> | |
</td> | |
</tr> | |
{{ end }} | |
{{ range .Alerts.Firing }} | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> | |
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
{{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} | |
{{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} | |
{{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} | |
<a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
</td> | |
</tr> | |
{{ end }} | |
{{ if gt (len .Alerts.Resolved) 0 }} | |
{{ if gt (len .Alerts.Firing) 0 }} | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> | |
<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
<hr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
</td> | |
</tr> | |
{{ end }} | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> | |
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Resolved | len }}] Resolved</strong> | |
</td> | |
</tr> | |
{{ end }} | |
{{ range .Alerts.Resolved }} | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> | |
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
{{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} | |
{{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} | |
{{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} | |
<a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> | |
</td> | |
</tr> | |
{{ end }} | |
</table> | |
</td> | |
</tr> | |
</table> | |
<div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; clear: both; color: #999; margin: 0; padding: 20px;"> | |
<table width="100%" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; vertical-align: top; text-align: center; color: #999; margin: 0; padding: 0 0 20px;" align="center" valign="top"><a href="{{ .ExternalURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; color: #999; text-decoration: underline; margin: 0;">Sent by {{ template "__alertmanager" . }}</a></td> | |
</tr> | |
</table> | |
</div></div> | |
</td> | |
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> | |
</tr> | |
</table> | |
</body> | |
</html> | |
{{ end }} | |
{{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} | |
{{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} | |
{{ if gt (len .Alerts.Firing) 0 }} | |
Alerts Firing: | |
{{ template "__text_alert_list" .Alerts.Firing }} | |
{{ end }} | |
{{ if gt (len .Alerts.Resolved) 0 }} | |
Alerts Resolved: | |
{{ template "__text_alert_list" .Alerts.Resolved }} | |
{{ end }} | |
{{ end }} | |
{{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} | |
slack.tmpl: | | |
{{ define "slack.devops.text" }} | |
{{range .Alerts}}{{.Annotations.DESCRIPTION}} | |
{{end}} | |
{{ end }} | |
kind: ConfigMap | |
metadata: | |
creationTimestamp: null | |
name: alertmanager-templates | |
namespace: monitoring | |
--- | |
kind: ConfigMap | |
apiVersion: v1 | |
metadata: | |
name: alertmanager | |
namespace: monitoring | |
data: | |
config.yml: |- | |
global: | |
# ResolveTimeout is the time after which an alert is declared resolved | |
# if it has not been updated. | |
resolve_timeout: 5m | |
# The smarthost and SMTP sender used for mail notifications. | |
smtp_smarthost: 'smtp.gmail.com:587' | |
smtp_from: '[email protected]' | |
smtp_auth_username: '[email protected]' | |
smtp_auth_password: 'barfoo' | |
# The API URL to use for Slack notifications. | |
slack_api_url: 'https://hooks.slack.com/services/some/api/token' | |
# # The directory from which notification templates are read. | |
templates: | |
- '/etc/alertmanager-templates/*.tmpl' | |
# The root route on which each incoming alert enters. | |
route: | |
# The labels by which incoming alerts are grouped together. For example, | |
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would | |
# be batched into a single group. | |
group_by: ['alertname', 'cluster', 'service'] | |
# When a new group of alerts is created by an incoming alert, wait at | |
# least 'group_wait' to send the initial notification. | |
# This way ensures that you get multiple alerts for the same group that start | |
# firing shortly after another are batched together on the first | |
# notification. | |
group_wait: 30s | |
# When the first notification was sent, wait 'group_interval' to send a batch | |
# of new alerts that started firing for that group. | |
group_interval: 5m | |
# If an alert has successfully been sent, wait 'repeat_interval' to | |
# resend them. | |
#repeat_interval: 1m | |
repeat_interval: 15m | |
# A default receiver | |
# If an alert isn't caught by a route, send it to default. | |
receiver: default | |
# All the above attributes are inherited by all child routes and can | |
# overwritten on each. | |
# The child route trees. | |
routes: | |
# Send severity=slack alerts to slack. | |
- match: | |
severity: slack | |
receiver: slack_alert | |
# - match: | |
# severity: email | |
# receiver: email_alert | |
receivers: | |
- name: 'default' | |
slack_configs: | |
- channel: '#alertmanager-test' | |
text: '<!channel>{{ template "slack.devops.text" . }}' | |
send_resolved: true | |
- name: 'slack_alert' | |
slack_configs: | |
- channel: '#alertmanager-test' | |
send_resolved: true | |
--- | |
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: alertmanager | |
namespace: monitoring | |
spec: | |
replicas: 1 | |
selector: | |
matchLabels: | |
app: alertmanager | |
template: | |
metadata: | |
name: alertmanager | |
labels: | |
app: alertmanager | |
spec: | |
containers: | |
- name: alertmanager | |
image: quay.io/prometheus/alertmanager:v0.7.1 | |
args: | |
- '-config.file=/etc/alertmanager/config.yml' | |
- '-storage.path=/alertmanager' | |
ports: | |
- name: alertmanager | |
containerPort: 9093 | |
volumeMounts: | |
- name: config-volume | |
mountPath: /etc/alertmanager | |
- name: templates-volume | |
mountPath: /etc/alertmanager-templates | |
- name: alertmanager | |
mountPath: /alertmanager | |
volumes: | |
- name: config-volume | |
configMap: | |
name: alertmanager | |
- name: templates-volume | |
configMap: | |
name: alertmanager-templates | |
- name: alertmanager | |
emptyDir: {} | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
annotations: | |
prometheus.io/scrape: 'true' | |
prometheus.io/path: '/metrics' | |
labels: | |
name: alertmanager | |
name: alertmanager | |
namespace: monitoring | |
spec: | |
selector: | |
app: alertmanager | |
type: NodePort | |
ports: | |
- name: alertmanager | |
protocol: TCP | |
port: 9093 | |
targetPort: 9093 | |
--- | |
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: grafana-core | |
namespace: monitoring | |
labels: | |
app: grafana | |
component: core | |
spec: | |
replicas: 1 | |
selector: | |
matchLabels: | |
app: grafana | |
template: | |
metadata: | |
labels: | |
app: grafana | |
component: core | |
spec: | |
containers: | |
- image: grafana/grafana:4.2.0 | |
name: grafana-core | |
imagePullPolicy: IfNotPresent | |
# env: | |
resources: | |
# keep request = limit to keep this container in guaranteed class | |
limits: | |
cpu: 100m | |
memory: 100Mi | |
requests: | |
cpu: 100m | |
memory: 100Mi | |
env: | |
# The following env variables set up basic auth twith the default admin user and admin password. | |
- name: GF_AUTH_BASIC_ENABLED | |
value: "true" | |
- name: GF_SECURITY_ADMIN_USER | |
valueFrom: | |
secretKeyRef: | |
name: grafana | |
key: admin-username | |
- name: GF_SECURITY_ADMIN_PASSWORD | |
valueFrom: | |
secretKeyRef: | |
name: grafana | |
key: admin-password | |
- name: GF_AUTH_ANONYMOUS_ENABLED | |
value: "false" | |
# - name: GF_AUTH_ANONYMOUS_ORG_ROLE | |
# value: Admin | |
# does not really work, because of template variables in exported dashboards: | |
# - name: GF_DASHBOARDS_JSON_ENABLED | |
# value: "true" | |
readinessProbe: | |
httpGet: | |
path: /login | |
port: 3000 | |
# initialDelaySeconds: 30 | |
# timeoutSeconds: 1 | |
volumeMounts: | |
- name: grafana-persistent-storage | |
mountPath: /var/lib/grafana | |
volumes: | |
- name: grafana-persistent-storage | |
emptyDir: {} | |
--- | |
apiVersion: v1 | |
data: | |
grafana-net-2-dashboard.json: | | |
{ | |
"__inputs": [{ | |
"name": "DS_PROMETHEUS", | |
"label": "Prometheus", | |
"description": "", | |
"type": "datasource", | |
"pluginId": "prometheus", | |
"pluginName": "Prometheus" | |
}], | |
"__requires": [{ | |
"type": "panel", | |
"id": "singlestat", | |
"name": "Singlestat", | |
"version": "" | |
}, { | |
"type": "panel", | |
"id": "text", | |
"name": "Text", | |
"version": "" | |
}, { | |
"type": "panel", | |
"id": "graph", | |
"name": "Graph", | |
"version": "" | |
}, { | |
"type": "grafana", | |
"id": "grafana", | |
"name": "Grafana", | |
"version": "3.1.0" | |
}, { | |
"type": "datasource", | |
"id": "prometheus", | |
"name": "Prometheus", | |
"version": "1.0.0" | |
}], | |
"id": null, | |
"title": "Prometheus Stats", | |
"tags": [], | |
"style": "dark", | |
"timezone": "browser", | |
"editable": true, | |
"hideControls": true, | |
"sharedCrosshair": false, | |
"rows": [{ | |
"collapse": false, | |
"editable": true, | |
"height": 178, | |
"panels": [{ | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 1, | |
"editable": true, | |
"error": false, | |
"format": "s", | |
"id": 5, | |
"interval": null, | |
"links": [], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"span": 3, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "(time() - container_start_time_seconds{container_name=\"kube-apiserver\"})", | |
"intervalFactor": 2, | |
"refId": "A", | |
"step": 4 | |
}], | |
"thresholds": "", | |
"title": "Uptime", | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current", | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"rangeMaps": [{ | |
"from": "null", | |
"to": "null", | |
"text": "N/A" | |
}], | |
"mappingType": 1, | |
"gauge": { | |
"show": false, | |
"minValue": 0, | |
"maxValue": 100, | |
"thresholdMarkers": true, | |
"thresholdLabels": false | |
} | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"], | |
"datasource": "${DS_PROMETHEUS}", | |
"editable": true, | |
"error": false, | |
"format": "none", | |
"id": 6, | |
"interval": null, | |
"links": [], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"span": 3, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": true | |
}, | |
"targets": [{ | |
"expr": "prometheus_local_storage_memory_series", | |
"intervalFactor": 2, | |
"refId": "A", | |
"step": 4 | |
}], | |
"thresholds": "1,5", | |
"title": "Local Storage Memory Series", | |
"type": "singlestat", | |
"valueFontSize": "70%", | |
"valueMaps": [], | |
"valueName": "current", | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"rangeMaps": [{ | |
"from": "null", | |
"to": "null", | |
"text": "N/A" | |
}], | |
"mappingType": 1, | |
"gauge": { | |
"show": false, | |
"minValue": 0, | |
"maxValue": 100, | |
"thresholdMarkers": true, | |
"thresholdLabels": false | |
} | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": true, | |
"colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"], | |
"datasource": "${DS_PROMETHEUS}", | |
"editable": true, | |
"error": false, | |
"format": "none", | |
"id": 7, | |
"interval": null, | |
"links": [], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"span": 3, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": true | |
}, | |
"targets": [{ | |
"expr": "prometheus_local_storage_indexing_queue_length", | |
"intervalFactor": 2, | |
"refId": "A", | |
"step": 4 | |
}], | |
"thresholds": "500,4000", | |
"title": "Internal Storage Queue Length", | |
"type": "singlestat", | |
"valueFontSize": "70%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "Empty", | |
"value": "0" | |
}], | |
"valueName": "current", | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"rangeMaps": [{ | |
"from": "null", | |
"to": "null", | |
"text": "N/A" | |
}], | |
"mappingType": 1, | |
"gauge": { | |
"show": false, | |
"minValue": 0, | |
"maxValue": 100, | |
"thresholdMarkers": true, | |
"thresholdLabels": false | |
} | |
}, { | |
"content": "<img src=\"http://prometheus.io/assets/prometheus_logo_grey.svg\" alt=\"Prometheus logo\" style=\"height: 40px;\">\n<span style=\"font-family: 'Open Sans', 'Helvetica Neue', Helvetica; font-size: 25px;vertical-align: text-top;color: #bbbfc2;margin-left: 10px;\">Prometheus</span>\n\n<p style=\"margin-top: 10px;\">You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the <a href=\"http://www.grafana.org/\">Grafana</a> and <a href=\"http://prometheus.io/\">Prometheus</a> projects.</p>", | |
"editable": true, | |
"error": false, | |
"id": 9, | |
"links": [], | |
"mode": "html", | |
"span": 3, | |
"style": {}, | |
"title": "", | |
"transparent": true, | |
"type": "text" | |
}], | |
"title": "New row" | |
}, { | |
"collapse": false, | |
"editable": true, | |
"height": 227, | |
"panels": [{ | |
"aliasColors": { | |
"prometheus": "#C15C17", | |
"{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" | |
}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"id": 3, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 2, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 9, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "rate(prometheus_local_storage_ingested_samples_total[5m])", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{job}}", | |
"metric": "", | |
"refId": "A", | |
"step": 2 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Samples ingested (rate-5m)", | |
"tooltip": { | |
"shared": true, | |
"value_type": "cumulative", | |
"ordering": "alphabetical", | |
"msResolution": false | |
}, | |
"type": "graph", | |
"yaxes": [{ | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "short" | |
}, { | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "short" | |
}], | |
"xaxis": { | |
"show": true | |
} | |
}, { | |
"content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", | |
"editable": true, | |
"error": false, | |
"id": 8, | |
"links": [], | |
"mode": "markdown", | |
"span": 2.995914043583536, | |
"style": {}, | |
"title": "", | |
"transparent": true, | |
"type": "text" | |
}], | |
"title": "New row" | |
}, { | |
"collapse": false, | |
"editable": true, | |
"height": "250px", | |
"panels": [{ | |
"aliasColors": { | |
"prometheus": "#F9BA8F", | |
"{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" | |
}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"id": 2, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 5, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "rate(prometheus_target_interval_length_seconds_count[5m])", | |
"intervalFactor": 2, | |
"legendFormat": "{{job}}", | |
"refId": "A", | |
"step": 2 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Target Scrapes (last 5m)", | |
"tooltip": { | |
"shared": true, | |
"value_type": "cumulative", | |
"ordering": "alphabetical", | |
"msResolution": false | |
}, | |
"type": "graph", | |
"yaxes": [{ | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "short" | |
}, { | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "short" | |
}], | |
"xaxis": { | |
"show": true | |
} | |
}, { | |
"aliasColors": {}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"id": 14, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 4, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{quantile}} ({{interval}})", | |
"metric": "", | |
"refId": "A", | |
"step": 2 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Scrape Duration", | |
"tooltip": { | |
"shared": true, | |
"value_type": "cumulative", | |
"ordering": "alphabetical", | |
"msResolution": false | |
}, | |
"type": "graph", | |
"yaxes": [{ | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "short" | |
}, { | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "short" | |
}], | |
"xaxis": { | |
"show": true | |
} | |
}, { | |
"content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ", | |
"editable": true, | |
"error": false, | |
"id": 11, | |
"links": [], | |
"mode": "markdown", | |
"span": 3, | |
"style": {}, | |
"title": "", | |
"transparent": true, | |
"type": "text" | |
}], | |
"title": "New row" | |
}, { | |
"collapse": false, | |
"editable": true, | |
"height": "250px", | |
"panels": [{ | |
"aliasColors": {}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": null, | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"id": 12, | |
"legend": { | |
"alignAsTable": false, | |
"avg": false, | |
"current": false, | |
"hideEmpty": true, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 9, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "prometheus_evaluator_duration_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{quantile}}", | |
"refId": "A", | |
"step": 2 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Rule Eval Duration", | |
"tooltip": { | |
"shared": true, | |
"value_type": "cumulative", | |
"ordering": "alphabetical", | |
"msResolution": false | |
}, | |
"type": "graph", | |
"yaxes": [{ | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "percentunit", | |
"label": "" | |
}, { | |
"show": true, | |
"min": null, | |
"max": null, | |
"logBase": 1, | |
"format": "short" | |
}], | |
"xaxis": { | |
"show": true | |
} | |
}, { | |
"content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.", | |
"editable": true, | |
"error": false, | |
"id": 15, | |
"links": [], | |
"mode": "markdown", | |
"span": 3, | |
"style": {}, | |
"title": "", | |
"transparent": true, | |
"type": "text" | |
}], | |
"title": "New row" | |
}], | |
"time": { | |
"from": "now-5m", | |
"to": "now" | |
}, | |
"timepicker": { | |
"now": true, | |
"refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], | |
"time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] | |
}, | |
"templating": { | |
"list": [] | |
}, | |
"annotations": { | |
"list": [] | |
}, | |
"refresh": false, | |
"schemaVersion": 12, | |
"version": 0, | |
"links": [{ | |
"icon": "info", | |
"tags": [], | |
"targetBlank": true, | |
"title": "Grafana Docs", | |
"tooltip": "", | |
"type": "link", | |
"url": "http://www.grafana.org/docs" | |
}, { | |
"icon": "info", | |
"tags": [], | |
"targetBlank": true, | |
"title": "Prometheus Docs", | |
"type": "link", | |
"url": "http://prometheus.io/docs/introduction/overview/" | |
}], | |
"gnetId": 2, | |
"description": "The official, pre-built Prometheus Stats Dashboard." | |
} | |
grafana-net-737-dashboard.json: | | |
{ | |
"__inputs": [{ | |
"name": "DS_PROMETHEUS", | |
"label": "prometheus", | |
"description": "", | |
"type": "datasource", | |
"pluginId": "prometheus", | |
"pluginName": "Prometheus" | |
}], | |
"__requires": [{ | |
"type": "panel", | |
"id": "singlestat", | |
"name": "Singlestat", | |
"version": "" | |
}, { | |
"type": "panel", | |
"id": "graph", | |
"name": "Graph", | |
"version": "" | |
}, { | |
"type": "grafana", | |
"id": "grafana", | |
"name": "Grafana", | |
"version": "3.1.0" | |
}, { | |
"type": "datasource", | |
"id": "prometheus", | |
"name": "Prometheus", | |
"version": "1.0.0" | |
}], | |
"id": null, | |
"title": "Kubernetes Pod Resources", | |
"description": "Shows resource usage of Kubernetes pods.", | |
"tags": [ | |
"kubernetes" | |
], | |
"style": "dark", | |
"timezone": "browser", | |
"editable": true, | |
"hideControls": false, | |
"sharedCrosshair": false, | |
"rows": [{ | |
"collapse": false, | |
"editable": true, | |
"height": "250px", | |
"panels": [{ | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": true, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"editable": true, | |
"error": false, | |
"format": "percent", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": true, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "180px", | |
"id": 4, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 4, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum (container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum (machine_memory_bytes{instance=~\"^$instance$\"}) * 100", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "", | |
"refId": "A", | |
"step": 2 | |
}], | |
"thresholds": "65, 90", | |
"timeFrom": "1m", | |
"timeShift": null, | |
"title": "Memory Working Set", | |
"transparent": false, | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": true, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "percent", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": true, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "180px", | |
"id": 6, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 4, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum(rate(container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m])) / sum (machine_cpu_cores{instance=~\"^$instance$\"}) * 100", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "65, 90", | |
"timeFrom": "1m", | |
"timeShift": null, | |
"title": "Cpu Usage", | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": true, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "percent", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": true, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "180px", | |
"id": 7, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 4, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum(container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"}) * 100", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"legendFormat": "", | |
"metric": "", | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "65, 90", | |
"timeFrom": "1m", | |
"timeShift": null, | |
"title": "Filesystem Usage", | |
"type": "singlestat", | |
"valueFontSize": "80%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "bytes", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "1px", | |
"hideTimeOverride": true, | |
"id": 9, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "20%", | |
"prefix": "", | |
"prefixFontSize": "20%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 2, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum(container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"})", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "", | |
"timeFrom": "1m", | |
"title": "Used", | |
"type": "singlestat", | |
"valueFontSize": "50%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "bytes", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "1px", | |
"hideTimeOverride": true, | |
"id": 10, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 2, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum (machine_memory_bytes{instance=~\"^$instance$\"})", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "", | |
"timeFrom": "1m", | |
"title": "Total", | |
"type": "singlestat", | |
"valueFontSize": "50%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "none", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "1px", | |
"hideTimeOverride": true, | |
"id": 11, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": " cores", | |
"postfixFontSize": "30%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 2, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m]))", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "", | |
"timeFrom": "1m", | |
"timeShift": null, | |
"title": "Used", | |
"type": "singlestat", | |
"valueFontSize": "50%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "none", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "1px", | |
"hideTimeOverride": true, | |
"id": 12, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": " cores", | |
"postfixFontSize": "30%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 2, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum (machine_cpu_cores{instance=~\"^$instance$\"})", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "", | |
"timeFrom": "1m", | |
"title": "Total", | |
"type": "singlestat", | |
"valueFontSize": "50%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "bytes", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "1px", | |
"hideTimeOverride": true, | |
"id": 13, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 2, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"})", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "", | |
"timeFrom": "1m", | |
"title": "Used", | |
"type": "singlestat", | |
"valueFontSize": "50%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"cacheTimeout": null, | |
"colorBackground": false, | |
"colorValue": false, | |
"colors": [ | |
"rgba(50, 172, 45, 0.97)", | |
"rgba(237, 129, 40, 0.89)", | |
"rgba(245, 54, 54, 0.9)" | |
], | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"format": "bytes", | |
"gauge": { | |
"maxValue": 100, | |
"minValue": 0, | |
"show": false, | |
"thresholdLabels": false, | |
"thresholdMarkers": true | |
}, | |
"height": "1px", | |
"hideTimeOverride": true, | |
"id": 14, | |
"interval": null, | |
"isNew": true, | |
"links": [], | |
"mappingType": 1, | |
"mappingTypes": [{ | |
"name": "value to text", | |
"value": 1 | |
}, { | |
"name": "range to text", | |
"value": 2 | |
}], | |
"maxDataPoints": 100, | |
"nullPointMode": "connected", | |
"nullText": null, | |
"postfix": "", | |
"postfixFontSize": "50%", | |
"prefix": "", | |
"prefixFontSize": "50%", | |
"rangeMaps": [{ | |
"from": "null", | |
"text": "N/A", | |
"to": "null" | |
}], | |
"span": 2, | |
"sparkline": { | |
"fillColor": "rgba(31, 118, 189, 0.18)", | |
"full": false, | |
"lineColor": "rgb(31, 120, 193)", | |
"show": false | |
}, | |
"targets": [{ | |
"expr": "sum (container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"})", | |
"interval": "10s", | |
"intervalFactor": 1, | |
"refId": "A", | |
"step": 10 | |
}], | |
"thresholds": "", | |
"timeFrom": "1m", | |
"title": "Total", | |
"type": "singlestat", | |
"valueFontSize": "50%", | |
"valueMaps": [{ | |
"op": "=", | |
"text": "N/A", | |
"value": "null" | |
}], | |
"valueName": "current" | |
}, { | |
"aliasColors": {}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)", | |
"thresholdLine": false | |
}, | |
"height": "200px", | |
"id": 32, | |
"isNew": true, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"max": false, | |
"min": false, | |
"rightSide": true, | |
"show": true, | |
"sideWidth": 200, | |
"sort": "current", | |
"sortDesc": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "sum(rate(container_network_receive_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "receive", | |
"metric": "network", | |
"refId": "A", | |
"step": 240 | |
}, { | |
"expr": "- sum(rate(container_network_transmit_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "transmit", | |
"metric": "network", | |
"refId": "B", | |
"step": 240 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Network", | |
"tooltip": { | |
"msResolution": false, | |
"shared": true, | |
"sort": 0, | |
"value_type": "cumulative" | |
}, | |
"transparent": false, | |
"type": "graph", | |
"xaxis": { | |
"show": true | |
}, | |
"yaxes": [{ | |
"format": "Bps", | |
"label": "transmit / receive", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, { | |
"format": "Bps", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": false | |
}] | |
}], | |
"showTitle": true, | |
"title": "all pods" | |
}, { | |
"collapse": false, | |
"editable": true, | |
"height": "250px", | |
"panels": [{ | |
"aliasColors": {}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 3, | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"height": "", | |
"id": 17, | |
"isNew": true, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"hideEmpty": true, | |
"hideZero": true, | |
"max": false, | |
"min": false, | |
"rightSide": true, | |
"show": true, | |
"sideWidth": null, | |
"sort": "current", | |
"sortDesc": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{ pod_name }}", | |
"metric": "container_cpu", | |
"refId": "A", | |
"step": 240 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Cpu Usage", | |
"tooltip": { | |
"msResolution": true, | |
"shared": false, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"transparent": false, | |
"type": "graph", | |
"xaxis": { | |
"show": true | |
}, | |
"yaxes": [{ | |
"format": "none", | |
"label": "cores", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, { | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": false | |
}] | |
}, { | |
"aliasColors": {}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"fill": 0, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"id": 33, | |
"isNew": true, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"hideEmpty": true, | |
"hideZero": true, | |
"max": false, | |
"min": false, | |
"rightSide": true, | |
"show": true, | |
"sideWidth": null, | |
"sort": "current", | |
"sortDesc": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "null", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{ pod_name }}", | |
"metric": "", | |
"refId": "A", | |
"step": 240 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Memory Working Set", | |
"tooltip": { | |
"msResolution": false, | |
"shared": false, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"show": true | |
}, | |
"yaxes": [{ | |
"format": "bytes", | |
"label": "used", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, { | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": false | |
}] | |
}, { | |
"aliasColors": {}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"id": 16, | |
"isNew": true, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"hideEmpty": true, | |
"hideZero": true, | |
"max": false, | |
"min": false, | |
"rightSide": true, | |
"show": true, | |
"sideWidth": 200, | |
"sort": "avg", | |
"sortDesc": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "null", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{ pod_name }} < in", | |
"metric": "network", | |
"refId": "A", | |
"step": 240 | |
}, { | |
"expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{ pod_name }} > out", | |
"metric": "network", | |
"refId": "B", | |
"step": 240 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Network", | |
"tooltip": { | |
"msResolution": false, | |
"shared": false, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"show": true | |
}, | |
"yaxes": [{ | |
"format": "Bps", | |
"label": "transmit / receive", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, { | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": false | |
}] | |
}, { | |
"aliasColors": {}, | |
"bars": false, | |
"datasource": "${DS_PROMETHEUS}", | |
"decimals": 2, | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": { | |
"threshold1": null, | |
"threshold1Color": "rgba(216, 200, 27, 0.27)", | |
"threshold2": null, | |
"threshold2Color": "rgba(234, 112, 112, 0.22)" | |
}, | |
"id": 34, | |
"isNew": true, | |
"legend": { | |
"alignAsTable": true, | |
"avg": true, | |
"current": true, | |
"hideEmpty": true, | |
"hideZero": true, | |
"max": false, | |
"min": false, | |
"rightSide": true, | |
"show": true, | |
"sideWidth": 200, | |
"sort": "current", | |
"sortDesc": true, | |
"total": false, | |
"values": true | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "null", | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"span": 12, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [{ | |
"expr": "sum(container_fs_usage_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)", | |
"interval": "", | |
"intervalFactor": 2, | |
"legendFormat": "{{ pod_name }}", | |
"metric": "network", | |
"refId": "A", | |
"step": 240 | |
}], | |
"timeFrom": null, | |
"timeShift": null, | |
"title": "Filesystem", | |
"tooltip": { | |
"msResolution": false, | |
"shared": false, | |
"sort": 2, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"show": true | |
}, | |
"yaxes": [{ | |
"format": "bytes", | |
"label": "used", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, { | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": false | |
}] | |
}], | |
"showTitle": true, | |
"title": "each pod" | |
}], | |
"time": { | |
"from": "now-3d", | |
"to": "now" | |
}, | |
"timepicker": { | |
"refresh_intervals": [ | |
"5s", | |
"10s", | |
"30s", | |
"1m", | |
"5m", | |
"15m", | |
"30m", | |
"1h", | |
"2h", | |
"1d" | |
], | |
"time_options": [ | |
"5m", | |
"15m", | |
"1h", | |
"6h", | |
"12h", | |
"24h", | |
"2d", | |
"7d", | |
"30d" | |
] | |
}, | |
"templating": { | |
"list": [{ | |
"allValue": ".*", | |
"current": {}, | |
"datasource": "${DS_PROMETHEUS}", | |
"hide": 0, | |
"includeAll": true, | |
"label": "Instance", | |
"multi": false, | |
"name": "instance", | |
"options": [], | |
"query": "label_values(instance)", | |
"refresh": 1, | |
"regex": "", | |
"type": "query" | |
}, { | |
"current": {}, | |
"datasource": "${DS_PROMETHEUS}", | |
"hide": 0, | |
"includeAll": true, | |
"label": "Namespace", | |
"multi": true, | |
"name": "namespace", | |
"options": [], | |
"query": "label_values(namespace)", | |
"refresh": 1, | |
"regex": "", | |
"type": "query" | |
}] | |
}, | |
"annotations": { | |
"list": [] | |
}, | |
"refresh": false, | |
"schemaVersion": 12, | |
"version": 8, | |
"links": [], | |
"gnetId": 737 | |
} | |
prometheus-datasource.json: | | |
{ | |
"name": "prometheus", | |
"type": "prometheus", | |
"url": "http://prometheus:9090", | |
"access": "proxy", | |
"basicAuth": false | |
} | |
kind: ConfigMap | |
metadata: | |
creationTimestamp: null | |
name: grafana-import-dashboards | |
namespace: monitoring | |
--- | |
apiVersion: batch/v1 | |
kind: Job | |
metadata: | |
name: grafana-import-dashboards | |
namespace: monitoring | |
labels: | |
app: grafana | |
component: import-dashboards | |
spec: | |
template: | |
metadata: | |
name: grafana-import-dashboards | |
labels: | |
app: grafana | |
component: import-dashboards | |
spec: | |
serviceAccountName: prometheus-k8s | |
initContainers: | |
- name: wait-for-grafana | |
image: giantswarm/tiny-tools | |
args: | |
- /bin/sh | |
- -c | |
- > | |
set -x; | |
while [ $(curl -Lsw '%{http_code}' "http://grafana:3000" -o /dev/null) -ne 200 ]; do | |
echo '.' | |
sleep 15; | |
done | |
containers: | |
- name: grafana-import-dashboards | |
image: giantswarm/tiny-tools | |
command: ["/bin/sh", "-c"] | |
workingDir: /opt/grafana-import-dashboards | |
args: | |
- > | |
for file in *-datasource.json ; do | |
if [ -e "$file" ] ; then | |
echo "importing $file" && | |
curl --silent --fail --show-error \ | |
--request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/datasources \ | |
--header "Content-Type: application/json" \ | |
--data-binary "@$file" ; | |
echo "" ; | |
fi | |
done ; | |
for file in *-dashboard.json ; do | |
if [ -e "$file" ] ; then | |
echo "importing $file" && | |
( echo '{"dashboard":'; \ | |
cat "$file"; \ | |
echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \ | |
| jq -c '.' \ | |
| curl --silent --fail --show-error \ | |
--request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/dashboards/import \ | |
--header "Content-Type: application/json" \ | |
--data-binary "@-" ; | |
echo "" ; | |
fi | |
done | |
env: | |
- name: GF_ADMIN_USER | |
valueFrom: | |
secretKeyRef: | |
name: grafana | |
key: admin-username | |
- name: GF_ADMIN_PASSWORD | |
valueFrom: | |
secretKeyRef: | |
name: grafana | |
key: admin-password | |
volumeMounts: | |
- name: config-volume | |
mountPath: /opt/grafana-import-dashboards | |
restartPolicy: Never | |
volumes: | |
- name: config-volume | |
configMap: | |
name: grafana-import-dashboards | |
--- | |
# apiVersion: extensions/v1beta1 | |
# kind: Ingress | |
# metadata: | |
# name: grafana | |
# namespace: monitoring | |
# spec: | |
# rules: | |
# - host: <yourchoice>.<cluster-id>.k8s.gigantic.io | |
# http: | |
# paths: | |
# - path: / | |
# backend: | |
# serviceName: grafana | |
# servicePort: 3000 | |
--- | |
apiVersion: v1 | |
kind: Secret | |
data: | |
admin-password: YWRtaW4= | |
admin-username: YWRtaW4= | |
metadata: | |
name: grafana | |
namespace: monitoring | |
type: Opaque | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: grafana | |
namespace: monitoring | |
labels: | |
app: grafana | |
component: core | |
spec: | |
type: NodePort | |
ports: | |
- port: 3000 | |
selector: | |
app: grafana | |
component: core | |
--- | |
apiVersion: v1 | |
data: | |
prometheus.yaml: | | |
global: | |
scrape_interval: 10s | |
scrape_timeout: 10s | |
evaluation_interval: 10s | |
rule_files: | |
- "/etc/prometheus-rules/*.rules" | |
scrape_configs: | |
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37 | |
- job_name: 'kubernetes-nodes' | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | |
kubernetes_sd_configs: | |
- role: node | |
relabel_configs: | |
- source_labels: [__address__] | |
regex: '(.*):10250' | |
replacement: '${1}:10255' | |
target_label: __address__ | |
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79 | |
- job_name: 'kubernetes-endpoints' | |
kubernetes_sd_configs: | |
- role: endpoints | |
relabel_configs: | |
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] | |
action: keep | |
regex: true | |
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] | |
action: replace | |
target_label: __scheme__ | |
regex: (https?) | |
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] | |
action: replace | |
target_label: __metrics_path__ | |
regex: (.+) | |
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] | |
action: replace | |
target_label: __address__ | |
regex: (.+)(?::\d+);(\d+) | |
replacement: $1:$2 | |
- action: labelmap | |
regex: __meta_kubernetes_service_label_(.+) | |
- source_labels: [__meta_kubernetes_namespace] | |
action: replace | |
target_label: kubernetes_namespace | |
- source_labels: [__meta_kubernetes_service_name] | |
action: replace | |
target_label: kubernetes_name | |
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119 | |
- job_name: 'kubernetes-services' | |
metrics_path: /probe | |
params: | |
module: [http_2xx] | |
kubernetes_sd_configs: | |
- role: service | |
relabel_configs: | |
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] | |
action: keep | |
regex: true | |
- source_labels: [__address__] | |
target_label: __param_target | |
- target_label: __address__ | |
replacement: blackbox | |
- source_labels: [__param_target] | |
target_label: instance | |
- action: labelmap | |
regex: __meta_kubernetes_service_label_(.+) | |
- source_labels: [__meta_kubernetes_namespace] | |
target_label: kubernetes_namespace | |
- source_labels: [__meta_kubernetes_service_name] | |
target_label: kubernetes_name | |
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156 | |
- job_name: 'kubernetes-pods' | |
kubernetes_sd_configs: | |
- role: pod | |
relabel_configs: | |
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] | |
action: keep | |
regex: true | |
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] | |
action: replace | |
target_label: __metrics_path__ | |
regex: (.+) | |
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] | |
action: replace | |
regex: (.+):(?:\d+);(\d+) | |
replacement: ${1}:${2} | |
target_label: __address__ | |
- action: labelmap | |
regex: __meta_kubernetes_pod_label_(.+) | |
- source_labels: [__meta_kubernetes_namespace] | |
action: replace | |
target_label: kubernetes_namespace | |
- source_labels: [__meta_kubernetes_pod_name] | |
action: replace | |
target_label: kubernetes_pod_name | |
- source_labels: [__meta_kubernetes_pod_container_port_number] | |
action: keep | |
regex: 9\d{3} | |
- job_name: 'kubernetes-cadvisor' | |
scheme: https | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | |
kubernetes_sd_configs: | |
- role: node | |
relabel_configs: | |
- action: labelmap | |
- action: labelmap | |
regex: __meta_kubernetes_node_label_(.+) | |
- target_label: __address__ | |
replacement: kubernetes.default.svc:443 | |
- source_labels: [__meta_kubernetes_node_name] | |
regex: (.+) | |
target_label: __metrics_path__ | |
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor | |
kind: ConfigMap | |
metadata: | |
creationTimestamp: null | |
name: prometheus-core | |
namespace: monitoring | |
--- | |
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: prometheus-core | |
namespace: monitoring | |
labels: | |
app: prometheus | |
component: core | |
spec: | |
replicas: 1 | |
selector: | |
matchLabels: | |
app: prometheus | |
template: | |
metadata: | |
name: prometheus-main | |
labels: | |
app: prometheus | |
component: core | |
spec: | |
serviceAccountName: prometheus-k8s | |
containers: | |
- name: prometheus | |
image: prom/prometheus:v1.7.0 | |
args: | |
- '-storage.local.retention=12h' | |
- '-storage.local.memory-chunks=500000' | |
- '-config.file=/etc/prometheus/prometheus.yaml' | |
- '-alertmanager.url=http://alertmanager:9093/' | |
ports: | |
- name: webui | |
containerPort: 9090 | |
resources: | |
requests: | |
cpu: 500m | |
memory: 500M | |
limits: | |
cpu: 500m | |
memory: 500M | |
volumeMounts: | |
- name: config-volume | |
mountPath: /etc/prometheus | |
- name: rules-volume | |
mountPath: /etc/prometheus-rules | |
volumes: | |
- name: config-volume | |
configMap: | |
name: prometheus-core | |
- name: rules-volume | |
configMap: | |
name: prometheus-rules | |
--- | |
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: kube-state-metrics | |
namespace: monitoring | |
labels: | |
app: kube-state-metrics | |
spec: | |
replicas: 1 | |
selector: | |
matchLabels: | |
app: kube-state-metrics | |
template: | |
metadata: | |
labels: | |
app: kube-state-metrics | |
spec: | |
serviceAccountName: kube-state-metrics | |
containers: | |
- name: kube-state-metrics | |
image: gcr.io/google_containers/kube-state-metrics:v0.5.0 | |
ports: | |
- containerPort: 8080 | |
--- | |
# --- | |
# apiVersion: rbac.authorization.k8s.io/v1beta1 | |
# kind: ClusterRoleBinding | |
# metadata: | |
# name: kube-state-metrics | |
# roleRef: | |
# apiGroup: rbac.authorization.k8s.io | |
# kind: ClusterRole | |
# name: kube-state-metrics | |
# subjects: | |
# - kind: ServiceAccount | |
# name: kube-state-metrics | |
# namespace: monitoring | |
# --- | |
# apiVersion: rbac.authorization.k8s.io/v1beta1 | |
# kind: ClusterRole | |
# metadata: | |
# name: kube-state-metrics | |
# rules: | |
# - apiGroups: [""] | |
# resources: | |
# - nodes | |
# - pods | |
# - services | |
# - resourcequotas | |
# - replicationcontrollers | |
# - limitranges | |
# verbs: ["list", "watch"] | |
# - apiGroups: ["apps"] | |
# resources: | |
# - daemonsets | |
# - deployments | |
# - replicasets | |
# verbs: ["list", "watch"] | |
# --- | |
apiVersion: v1 | |
kind: ServiceAccount | |
metadata: | |
name: kube-state-metrics | |
namespace: monitoring | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
annotations: | |
prometheus.io/scrape: 'true' | |
name: kube-state-metrics | |
namespace: monitoring | |
labels: | |
app: kube-state-metrics | |
spec: | |
ports: | |
- name: kube-state-metrics | |
port: 8080 | |
protocol: TCP | |
selector: | |
app: kube-state-metrics | |
--- | |
apiVersion: apps/v1 | |
kind: DaemonSet | |
metadata: | |
name: node-directory-size-metrics | |
namespace: monitoring | |
labels: | |
app: node-directory-size-metrics | |
annotations: | |
description: | | |
This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes. | |
The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. | |
The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus. | |
These are scheduled on every node in the Kubernetes cluster. | |
To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`. | |
spec: | |
selector: | |
matchLabels: | |
app: node-directory-size-metrics | |
template: | |
metadata: | |
labels: | |
app: node-directory-size-metrics | |
annotations: | |
prometheus.io/scrape: 'true' | |
prometheus.io/port: '9102' | |
description: | | |
This `Pod` provides metrics in Prometheus format about disk usage on the node. | |
The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. | |
The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus. | |
This `Pod` is scheduled on every node in the Kubernetes cluster. | |
To choose directories from the node to check just mount them on `read-du` below `/mnt`. | |
spec: | |
containers: | |
- name: read-du | |
image: giantswarm/tiny-tools | |
imagePullPolicy: Always | |
# FIXME threshold via env var | |
# The | |
command: | |
- fish | |
- --command | |
- | | |
touch /tmp/metrics-temp | |
while true | |
for directory in (du --bytes --separate-dirs --threshold=100M /mnt) | |
echo $directory | read size path | |
echo "node_directory_size_bytes{path=\"$path\"} $size" \ | |
>> /tmp/metrics-temp | |
end | |
mv /tmp/metrics-temp /tmp/metrics | |
sleep 300 | |
end | |
volumeMounts: | |
- name: host-fs-var | |
mountPath: /mnt/var | |
readOnly: true | |
- name: metrics | |
mountPath: /tmp | |
- name: caddy | |
image: dockermuenster/caddy:0.9.3 | |
command: | |
- "caddy" | |
- "-port=9102" | |
- "-root=/var/www" | |
ports: | |
- containerPort: 9102 | |
volumeMounts: | |
- name: metrics | |
mountPath: /var/www | |
volumes: | |
- name: host-fs-var | |
hostPath: | |
path: /var | |
- name: metrics | |
emptyDir: | |
medium: Memory | |
--- | |
apiVersion: apps/v1 | |
kind: DaemonSet | |
metadata: | |
name: prometheus-node-exporter | |
namespace: monitoring | |
labels: | |
app: prometheus | |
component: node-exporter | |
spec: | |
selector: | |
matchLabels: | |
app: prometheus | |
template: | |
metadata: | |
name: prometheus-node-exporter | |
labels: | |
app: prometheus | |
component: node-exporter | |
spec: | |
containers: | |
- image: prom/node-exporter:v0.14.0 | |
name: prometheus-node-exporter | |
ports: | |
- name: prom-node-exp | |
#^ must be an IANA_SVC_NAME (at most 15 characters, ..) | |
containerPort: 9100 | |
hostPort: 9100 | |
hostNetwork: true | |
hostPID: true | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
annotations: | |
prometheus.io/scrape: 'true' | |
name: prometheus-node-exporter | |
namespace: monitoring | |
labels: | |
app: prometheus | |
component: node-exporter | |
spec: | |
clusterIP: None | |
ports: | |
- name: prometheus-node-exporter | |
port: 9100 | |
protocol: TCP | |
selector: | |
app: prometheus | |
component: node-exporter | |
type: ClusterIP | |
--- | |
apiVersion: v1 | |
data: | |
cpu-usage.rules: | | |
ALERT NodeCPUUsage | |
IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75 | |
FOR 2m | |
LABELS { | |
severity="page" | |
} | |
ANNOTATIONS { | |
SUMMARY = "{{$labels.instance}}: High CPU usage detected", | |
DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})" | |
} | |
instance-availability.rules: | | |
ALERT InstanceDown | |
IF up == 0 | |
FOR 1m | |
LABELS { severity = "page" } | |
ANNOTATIONS { | |
summary = "Instance {{ $labels.instance }} down", | |
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.", | |
} | |
low-disk-space.rules: | | |
ALERT NodeLowRootDisk | |
IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75 | |
FOR 2m | |
LABELS { | |
severity="page" | |
} | |
ANNOTATIONS { | |
SUMMARY = "{{$labels.instance}}: Low root disk space", | |
DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})" | |
} | |
ALERT NodeLowDataDisk | |
IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75 | |
FOR 2m | |
LABELS { | |
severity="page" | |
} | |
ANNOTATIONS { | |
SUMMARY = "{{$labels.instance}}: Low data disk space", | |
DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})" | |
} | |
mem-usage.rules: | | |
ALERT NodeSwapUsage | |
IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75 | |
FOR 2m | |
LABELS { | |
severity="page" | |
} | |
ANNOTATIONS { | |
SUMMARY = "{{$labels.instance}}: Swap usage detected", | |
DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})" | |
} | |
ALERT NodeMemoryUsage | |
IF (((node_memory_MemTotal-node_memory_MemAvailable)/(node_memory_MemTotal)*100)) > 75 | |
FOR 2m | |
LABELS { | |
severity="page" | |
} | |
ANNOTATIONS { | |
SUMMARY = "{{$labels.instance}}: High memory usage detected", | |
DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})" | |
} | |
kind: ConfigMap | |
metadata: | |
creationTimestamp: null | |
name: prometheus-rules | |
namespace: monitoring | |
--- | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: prometheus | |
namespace: monitoring | |
labels: | |
app: prometheus | |
component: core | |
annotations: | |
prometheus.io/scrape: 'true' | |
spec: | |
type: NodePort | |
ports: | |
- port: 9090 | |
protocol: TCP | |
name: webui | |
selector: | |
app: prometheus | |
component: core |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment