Last active
October 10, 2019 00:09
-
-
Save alexeldeib/e46689e659298d67cfe0794a4ee21312 to your computer and use it in GitHub Desktop.
Node problem one-shot config
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: v1 | |
kind: Namespace | |
metadata: | |
labels: | |
app: npd | |
name: npd | |
--- | |
apiVersion: v1 | |
kind: ServiceAccount | |
metadata: | |
labels: | |
app: npd | |
name: node-problem-detector | |
namespace: npd | |
--- | |
apiVersion: rbac.authorization.k8s.io/v1 | |
kind: ClusterRole | |
metadata: | |
labels: | |
app: npd | |
name: node-problem-detector | |
rules: | |
- apiGroups: | |
- "" | |
resources: | |
- nodes | |
verbs: | |
- get | |
- apiGroups: | |
- "" | |
resources: | |
- nodes/status | |
verbs: | |
- patch | |
- apiGroups: | |
- "" | |
resources: | |
- events | |
verbs: | |
- create | |
- patch | |
- update | |
--- | |
apiVersion: rbac.authorization.k8s.io/v1 | |
kind: ClusterRoleBinding | |
metadata: | |
labels: | |
app: npd | |
name: node-problem-detector | |
roleRef: | |
apiGroup: rbac.authorization.k8s.io | |
kind: ClusterRole | |
name: node-problem-detector | |
subjects: | |
- kind: ServiceAccount | |
name: node-problem-detector | |
namespace: npd | |
--- | |
apiVersion: v1 | |
data: | |
kernel-monitor.json: | | |
{ | |
"plugin": "kmsg", | |
"logPath": "/dev/kmsg", | |
"lookback": "5m", | |
"bufferSize": 10, | |
"source": "kernel-monitor", | |
"conditions": [ | |
{ | |
"type": "KernelDeadlock", | |
"reason": "KernelHasNoDeadlock", | |
"message": "kernel has no deadlock" | |
}, | |
{ | |
"type": "ReadonlyFilesystem", | |
"reason": "FilesystemIsReadOnly", | |
"message": "Filesystem is read-only" | |
} | |
], | |
"rules": [ | |
{ | |
"type": "temporary", | |
"reason": "OOMKilling", | |
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*" | |
}, | |
{ | |
"type": "temporary", | |
"reason": "TaskHung", | |
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\." | |
}, | |
{ | |
"type": "temporary", | |
"reason": "UnregisterNetDevice", | |
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+" | |
}, | |
{ | |
"type": "temporary", | |
"reason": "KernelOops", | |
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*" | |
}, | |
{ | |
"type": "temporary", | |
"reason": "KernelOops", | |
"pattern": "divide error: 0000 \\[#\\d+\\] SMP" | |
}, | |
{ | |
"type": "permanent", | |
"condition": "KernelDeadlock", | |
"reason": "AUFSUmountHung", | |
"pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\." | |
}, | |
{ | |
"type": "permanent", | |
"condition": "KernelDeadlock", | |
"reason": "DockerHung", | |
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\." | |
}, | |
{ | |
"type": "permanent", | |
"condition": "ReadonlyFilesystem", | |
"reason": "FilesystemIsReadOnly", | |
"pattern": "Remounting filesystem read-only" | |
} | |
] | |
} | |
docker-monitor.json: | | |
{ | |
"plugin": "journald", | |
"pluginConfig": { | |
"source": "dockerd" | |
}, | |
"logPath": "/var/log/journal", | |
"lookback": "5m", | |
"bufferSize": 10, | |
"source": "docker-monitor", | |
"conditions": [], | |
"rules": [ | |
{ | |
"type": "temporary", | |
"reason": "CorruptDockerImage", | |
"pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*" | |
} | |
] | |
} | |
systemd.json: | | |
{ | |
"plugin": "journald", | |
"pluginConfig": { | |
"source": "systemd" | |
}, | |
"logPath": "/var/log/journal", | |
"lookback": "", | |
"bufferSize": 10, | |
"source": "systemd-monitor", | |
"conditions": [], | |
"rules": [ | |
{ | |
"type": "temporary", | |
"reason": "KubeletStart", | |
"pattern": "Started Kubernetes kubelet." | |
}, | |
{ | |
"type": "temporary", | |
"reason": "DockerStart", | |
"pattern": "Starting Docker Application Container Engine..." | |
}, | |
{ | |
"type": "temporary", | |
"reason": "ContainerdStart", | |
"pattern": "Starting containerd container runtime..." | |
} | |
] | |
} | |
kind: ConfigMap | |
metadata: | |
labels: | |
app: npd | |
name: node-problem-detector-config | |
namespace: npd | |
--- | |
apiVersion: apps/v1 | |
kind: DaemonSet | |
metadata: | |
labels: | |
app: npd | |
name: node-problem-detector | |
namespace: npd | |
spec: | |
selector: | |
matchLabels: | |
app: npd | |
template: | |
metadata: | |
labels: | |
app: npd | |
spec: | |
containers: | |
- command: | |
- /node-problem-detector | |
- --logtostderr | |
- --system-log-monitors=/config/kernel-monitor.json,/config/docker-monitor.json,/config/systemd.json | |
env: | |
- name: NODE_NAME | |
valueFrom: | |
fieldRef: | |
fieldPath: spec.nodeName | |
image: k8s.gcr.io/node-problem-detector:v0.6.3 | |
imagePullPolicy: Always | |
name: node-problem-detector | |
resources: | |
limits: | |
cpu: 10m | |
memory: 80Mi | |
requests: | |
cpu: 10m | |
memory: 80Mi | |
securityContext: | |
privileged: true | |
volumeMounts: | |
- mountPath: /var/log | |
name: log | |
readOnly: true | |
- mountPath: /dev/kmsg | |
name: kmsg | |
readOnly: true | |
- mountPath: /etc/localtime | |
name: localtime | |
readOnly: true | |
- mountPath: /config | |
name: config | |
readOnly: true | |
serviceAccount: node-problem-detector | |
tolerations: | |
- effect: NoSchedule | |
key: node-role.kubernetes.io/master | |
operator: Exists | |
volumes: | |
- hostPath: | |
path: /var/log/ | |
name: log | |
- hostPath: | |
path: /dev/kmsg | |
name: kmsg | |
- hostPath: | |
path: /etc/localtime | |
name: localtime | |
- configMap: | |
items: | |
- key: kernel-monitor.json | |
path: kernel-monitor.json | |
- key: docker-monitor.json | |
path: docker-monitor.json | |
- key: systemd.json | |
path: systemd.json | |
name: node-problem-detector-config | |
name: config |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
results in: