Last active
May 23, 2024 19:55
-
-
Save robertoriv/7537e77844d12154437dee0116f79603 to your computer and use it in GitHub Desktop.
Testing karpenter's eviction queueing changes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: non-critical-pod | |
spec: | |
replicas: 10 | |
selector: | |
matchLabels: | |
app: non-critical-pod | |
template: | |
metadata: | |
labels: | |
app: non-critical-pod | |
spec: | |
containers: | |
- name: alpine | |
image: alpine | |
command: | |
[ | |
"/bin/sh", | |
"-c", | |
'while true; do echo "[$(date +%Y-%m-%d_%H:%M:%S)] [non-critical] Waiting 10s ...."; sleep 10; done', | |
] | |
lifecycle: | |
preStop: | |
exec: | |
command: ["/bin/sleep", "30"] | |
resources: | |
limits: | |
cpu: "1" | |
memory: 1Gi | |
requests: | |
cpu: "1" | |
memory: 1Gi | |
terminationGracePeriodSeconds: 35 | |
nodeSelector: | |
karpenter.sh/nodepool: default | |
--- | |
apiVersion: policy/v1 | |
kind: PodDisruptionBudget | |
metadata: | |
name: non-critical-pod-pdb | |
spec: | |
maxUnavailable: 3 | |
selector: | |
matchLabels: | |
app: non-critical-pod | |
--- | |
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: critical-pod | |
spec: | |
replicas: 10 | |
selector: | |
matchLabels: | |
app: critical-pod | |
template: | |
metadata: | |
labels: | |
app: critical-pod | |
spec: | |
containers: | |
- name: alpine | |
image: alpine | |
command: | |
[ | |
"/bin/sh", | |
"-c", | |
'while true; do echo "[$(date +%Y-%m-%d_%H:%M:%S)] [critical] Waiting 10s ...."; sleep 10; done', | |
] | |
lifecycle: | |
preStop: | |
exec: | |
command: ["/bin/sleep", "30"] | |
resources: | |
limits: | |
cpu: "1" | |
memory: 1Gi | |
requests: | |
cpu: "1" | |
memory: 1Gi | |
terminationGracePeriodSeconds: 35 | |
priorityClassName: system-node-critical | |
nodeSelector: | |
karpenter.sh/nodepool: default | |
--- | |
apiVersion: policy/v1 | |
kind: PodDisruptionBudget | |
metadata: | |
name: critical-pod-pdb | |
spec: | |
maxUnavailable: 3 | |
selector: | |
matchLabels: | |
app: critical-pod |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Drain evicts pods from the node and returns true when all pods are evicted | |
// https://kubernetes.io/docs/concepts/architecture/nodes/#graceful-node-shutdown | |
func (t *Terminator) Drain(ctx context.Context, node *v1.Node) error { | |
pods, err := nodeutil.GetPods(ctx, t.kubeClient, node) | |
if err != nil { | |
return fmt.Errorf("listing pods on node, %w", err) | |
} | |
// evictablePods are pods that aren't yet terminating are eligible to have the eviction API called against them | |
evictablePods := lo.Filter(pods, func(p *v1.Pod, _ int) bool { return podutil.IsEvictable(p) }) | |
// check if there are any pods that are still terminating and haven't exceeded their termination grace period | |
// if there are any, only queue pods from the same eviction group or lower | |
terminatingPods := lo.Filter(pods, func(p *v1.Pod, _ int) bool { | |
return podutil.IsTerminating(p) && !podutil.IsStuckTerminating(p, t.clock) | |
}) | |
if len(terminatingPods) > 0 { | |
log.FromContext(ctx).Info(fmt.Sprintf("Started with %d evictable pods.", len(evictablePods))) | |
log.FromContext(ctx).Info(fmt.Sprintf("Found %d terminating pods: ", len(terminatingPods))) | |
for _, pod := range terminatingPods { | |
log.FromContext(ctx).Info(fmt.Sprintf(" - Pod: %s, Group: %d", pod.Name, podutil.GetPodEvictionGroup(pod))) | |
} | |
highestOrderPod := lo.MaxBy(terminatingPods, func(p *v1.Pod, max *v1.Pod) bool { | |
return podutil.GetPodEvictionGroup(p) > podutil.GetPodEvictionGroup(max) | |
}) | |
log.FromContext(ctx).Info(fmt.Sprintf("Highest order pod: %s, Group: %d", highestOrderPod.Name, podutil.GetPodEvictionGroup(highestOrderPod))) | |
evictablePods = lo.Filter(evictablePods, func(p *v1.Pod, _ int) bool { | |
return podutil.GetPodEvictionGroup(p) <= podutil.GetPodEvictionGroup(highestOrderPod) | |
}) | |
log.FromContext(ctx).Info(fmt.Sprintf("Narrowed the list of evictablePods to be queued to %d pods.", len(evictablePods))) | |
for _, pod := range evictablePods { | |
log.FromContext(ctx).Info(fmt.Sprintf(" - Pod: %s, Group: %d", pod.Name, podutil.GetPodEvictionGroup(pod))) | |
} | |
} | |
t.Evict(evictablePods) | |
// podsWaitingEvictionCount are the number of pods that either haven't had eviction called against them yet | |
// or are still actively terminated and haven't exceeded their termination grace period yet | |
podsWaitingEvictionCount := lo.CountBy(pods, func(p *v1.Pod) bool { return podutil.IsWaitingEviction(p, t.clock) }) | |
if podsWaitingEvictionCount > 0 { | |
return NewNodeDrainError(fmt.Errorf("%d pods are waiting to be evicted", len(pods))) | |
} | |
return nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment