Created
August 22, 2018 13:30
-
-
Save grafuls/ce447423846dae3793fe2f338ed55519 to your computer and use it in GitHub Desktop.
Configuration map for Prometheus OCP deployment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: v1 | |
kind: ConfigMap | |
metadata: | |
name: prom-k8s | |
data: | |
alerting.rules: | | |
groups: | |
- name: example-rules | |
interval: 30s # defaults to global interval | |
rules: | |
- alert: Node Down | |
expr: up{job="kubernetes-nodes"} == 0 | |
annotations: | |
miqTarget: "ContainerNode" | |
severity: "HIGH" | |
recording.rules: | | |
groups: | |
- name: aggregate_container_resources | |
rules: | |
- record: container_cpu_usage_rate | |
expr: sum without (cpu) (rate(container_cpu_usage_seconds_total[5m])) | |
- record: container_memory_rss_by_type | |
expr: container_memory_rss{id=~"/|/system.slice|/kubepods.slice"} > 0 | |
- record: container_cpu_usage_percent_by_host | |
expr: sum(rate(container_cpu_usage_seconds_total{id="/"}[5m])) BY(kubernetes_io_hostname) / ON(kubernetes_io_hostname) machine_cpu_cores | |
- record: apiserver_request_count_rate_by_resources | |
expr: sum without (client,instance,contentType) (rate(apiserver_request_count[5m])) | |
prometheus.yml: | | |
rule_files: | |
- '*.rules' | |
scrape_configs: | |
- job_name: 'kubernetes-apiservers' | |
kubernetes_sd_configs: | |
- role: endpoints | |
namespaces: | |
names: | |
- default | |
scheme: https | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | |
# Keep only the default/kubernetes service endpoints for the https port. This | |
# will add targets for each API server which Kubernetes adds an endpoint to | |
# the default/kubernetes service. | |
relabel_configs: | |
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] | |
action: keep | |
regex: kubernetes;https | |
# Scrape config for controllers. | |
# | |
# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for | |
# the controllers. | |
# | |
# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via | |
# endpoints. | |
- job_name: 'kubernetes-controllers' | |
scheme: https | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | |
kubernetes_sd_configs: | |
- role: endpoints | |
namespaces: | |
names: | |
- default | |
# Keep only the default/kubernetes service endpoints for the https port, and then | |
# set the port to 8444. This is the default configuration for the controllers on OpenShift | |
# masters. | |
relabel_configs: | |
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] | |
action: keep | |
regex: kubernetes;https | |
- source_labels: [__address__] | |
action: replace | |
target_label: __address__ | |
regex: (.+)(?::\d+) | |
replacement: $1:8444 | |
# Scrape config for nodes. | |
# | |
# Each node exposes a /metrics endpoint that contains operational metrics for | |
# the Kubelet and other components. | |
- job_name: 'kubernetes-nodes' | |
scheme: https | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | |
kubernetes_sd_configs: | |
- role: node | |
# Drop a very high cardinality metric that is incorrect in 3.7. It will be | |
# fixed in 3.9. | |
metric_relabel_configs: | |
- source_labels: [__name__] | |
action: drop | |
regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)' | |
relabel_configs: | |
- action: labelmap | |
regex: __meta_kubernetes_node_label_(.+) | |
# Scrape config for cAdvisor. | |
# | |
# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that | |
# reports container metrics for each running pod. Scrape those by default. | |
- job_name: 'kubernetes-cadvisor' | |
scheme: https | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | |
metrics_path: /metrics/cadvisor | |
kubernetes_sd_configs: | |
- role: node | |
# Exclude a set of high cardinality metrics that can contribute to significant | |
# memory use in large clusters. These can be selectively enabled as necessary | |
# for medium or small clusters. | |
metric_relabel_configs: | |
- source_labels: [__name__] | |
action: drop | |
regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))' | |
relabel_configs: | |
- action: labelmap | |
regex: __meta_kubernetes_node_label_(.+) | |
# Scrape config for service endpoints. | |
# | |
# The relabeling allows the actual service scrape endpoint to be configured | |
# via the following annotations: | |
# | |
# * `prometheus.io/scrape`: Only scrape services that have a value of `true` | |
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need | |
# to set this to `https` & most likely set the `tls_config` of the scrape config. | |
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this. | |
# * `prometheus.io/port`: If the metrics are exposed on a different port to the | |
# service then set this appropriately. | |
- job_name: 'kubernetes-service-endpoints' | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
# TODO: this should be per target | |
insecure_skip_verify: true | |
kubernetes_sd_configs: | |
- role: endpoints | |
relabel_configs: | |
# only scrape infrastructure components | |
- source_labels: [__meta_kubernetes_namespace] | |
action: keep | |
regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+' | |
# drop infrastructure components managed by other scrape targets | |
- source_labels: [__meta_kubernetes_service_name] | |
action: drop | |
regex: 'prometheus-node-exporter' | |
# only those that have requested scraping | |
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] | |
action: keep | |
regex: true | |
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] | |
action: replace | |
target_label: __scheme__ | |
regex: (https?) | |
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] | |
action: replace | |
target_label: __metrics_path__ | |
regex: (.+) | |
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] | |
action: replace | |
target_label: __address__ | |
regex: (.+)(?::\d+);(\d+) | |
replacement: $1:$2 | |
- action: labelmap | |
regex: __meta_kubernetes_service_label_(.+) | |
- source_labels: [__meta_kubernetes_namespace] | |
action: replace | |
target_label: kubernetes_namespace | |
- source_labels: [__meta_kubernetes_service_name] | |
action: replace | |
target_label: kubernetes_name | |
# Scrape config for node-exporter, which is expected to be running on port 9100. | |
- job_name: 'kubernetes-nodes-exporter' | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | |
kubernetes_sd_configs: | |
- role: node | |
metric_relabel_configs: | |
- source_labels: [__name__] | |
action: drop | |
regex: 'node_cpu|node_(disk|scrape_collector)_.+' | |
# preserve a subset of the network, netstat, vmstat, and filesystem series | |
- source_labels: [__name__] | |
action: replace | |
regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))' | |
target_label: __name__ | |
replacement: renamed_$1 | |
- source_labels: [__name__] | |
action: drop | |
regex: 'node_(netstat|vmstat|filesystem|network)_.+' | |
- source_labels: [__name__] | |
action: replace | |
regex: 'renamed_(.+)' | |
target_label: __name__ | |
replacement: $1 | |
# drop any partial expensive series | |
- source_labels: [__name__, device] | |
action: drop | |
regex: 'node_network_.+;veth.+' | |
- source_labels: [__name__, mountpoint] | |
action: drop | |
regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)' | |
relabel_configs: | |
- source_labels: [__address__] | |
regex: '(.*):10250' | |
replacement: '${1}:9100' | |
target_label: __address__ | |
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] | |
target_label: __instance__ | |
- action: labelmap | |
regex: __meta_kubernetes_node_label_(.+) | |
# TODO: auto-generate these sections, or add a dynamic infrastructure scraper | |
# Scrape config for the template service broker | |
- job_name: 'openshift-template-service-broker' | |
scheme: https | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt | |
server_name: apiserver.openshift-template-service-broker.svc | |
bearer_token_file: /var/run/secrets/kubernetes.io/scraper/token | |
kubernetes_sd_configs: | |
- role: endpoints | |
namespaces: | |
names: | |
- openshift-template-service-broker | |
relabel_configs: | |
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] | |
action: keep | |
regex: api-server;https | |
# Scrape config for the router | |
- job_name: 'openshift-router' | |
scheme: https | |
tls_config: | |
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt | |
server_name: router.default.svc | |
bearer_token_file: /var/run/secrets/kubernetes.io/scraper/token | |
kubernetes_sd_configs: | |
- role: endpoints | |
namespaces: | |
names: | |
- default | |
relabel_configs: | |
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] | |
action: keep | |
regex: router;1936-tcp | |
alerting: | |
alertmanagers: | |
- scheme: http | |
static_configs: | |
- targets: | |
- "localhost:9093" | |
apersion: v1 | |
kind: ConfigMap | |
medata: | |
me: alertmanager | |
da: | |
ertmanager.yml: | | |
global: | |
# The root route on which each incoming alert enters. | |
route: | |
# default route if none match | |
receiver: alert-buffer-wh | |
# The labels by which incoming alerts are grouped together. For example, | |
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would | |
# be batched into a single group. | |
# TODO: | |
group_by: [] | |
# All the above attributes are inherited by all child routes and can | |
# overwritten on each. | |
receivers: | |
- name: alert-buffer-wh | |
webhook_configs: | |
- url: http://localhost:9099/topics/alerts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment