Created
May 16, 2025 09:35
-
-
Save initcron/9bcc0569a93ffad83d7b025faf1c6461 to your computer and use it in GitHub Desktop.
KEDA Autoscaler with Latency and CPU Utilization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: keda.sh/v1alpha1 | |
kind: ScaledObject | |
metadata: | |
name: fastapi-latency-autoscaler | |
namespace: default | |
spec: | |
scaleTargetRef: | |
apiVersion: apps/v1 | |
kind: Deployment | |
name: model # update to your actual deployment name | |
minReplicaCount: 1 | |
maxReplicaCount: 5 | |
pollingInterval: 30 # seconds | |
cooldownPeriod: 300 # seconds before scaling down | |
triggers: | |
- type: prometheus | |
metadata: | |
serverAddress: http://prom-kube-prometheus-stack-prometheus.monitoring.svc:9090 | |
metricName: fastapi_latency_p95 | |
query: | | |
histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[1m])) by (le)) | |
threshold: "0.5" | |
- type: cpu | |
metricType: Utilization # Allowed types are 'Utilization' or 'AverageValue' | |
metadata: | |
value: "50" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment