wget https://get.helm.sh/helm-v3.2.1-linux-amd64.tar.gz
tar -zxvf helm-v3.2.1-linux-amd64.tar.gz
mv linux-amd64/helm /usr/local/bin/helm
helm repo add stable https://kubernetes-charts.storage.googleapis.com/
helm repo update
helm install grafana stable/grafana
kubectl port-forward --address 0.0.0.0 -n default svc/grafana 8081:80
# grafana password
kubectl get secret --namespace default grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
Last active
October 15, 2021 06:36
-
-
Save chenchun/7c2342b2987b6aecd97442307ec55cac to your computer and use it in GitHub Desktop.
metrics server, prometheus, vpa
https://kubernetes.io/zh/docs/tasks/administer-cluster/access-cluster-api/
# Check all possible clusters, as you .KUBECONFIG may have multiple contexts:
kubectl config view -o jsonpath='{"Cluster name\tServer\n"}{range .clusters[*]}{.name}{"\t"}{.cluster.server}{"\n"}{end}'
# Select name of cluster you want to interact with from above output:
export CLUSTER_NAME="some_server_name"
# Point to the API server refering the cluster name
APISERVER=$(kubectl config view -o jsonpath="{.clusters[?(@.name==\"$CLUSTER_NAME\")].cluster.server}")
# Gets the token value
TOKEN=$(kubectl get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='default')].data.token}"|base64 -d)
# Explore the API with TOKEN
curl -X GET $APISERVER/api --header "Authorization: Bearer $TOKEN" --insecure
kubectl config view
export CLUSTER_NAME="local"
# Point to the API server refering the cluster name
APISERVER=$(kubectl config view -o jsonpath="{.clusters[?(@.name==\"$CLUSTER_NAME\")].cluster.server}")
curl -k --cert /var/run/kubernetes/client-admin.crt \
--key /var/run/kubernetes/client-admin.key \
--cacert /var/run/kubernetes/server-ca.crt \
"$APISERVER/apis/apps.tkestack.io/v1/namespaces/default/tapps/example-tapp/scale"
token=$(cat ~/.kube/config | grep token | awk -F: '{print $2}' | awk '{print $1}')
echo $token
curl -k -H "Authorization: Bearer ${token}" https://$ip:10250/stats/summary?only_cpu_and_memory=true
kubectl api-resources | grep metrics
token=$(cat ~/.kube/config | grep token | awk -F: '{print $2}' | awk '{print $1}')
token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
echo $token
curl -H "Authorization: Bearer ${token}" -k https://metrics-server.kube-system
## list
curl -H "Authorization: Bearer ${token}" -k https://kubernetes/apis/metrics.k8s.io/v1beta1/namespaces/demo/pods/
## single pod
curl -H "Authorization: Bearer ${token}" -k https://metrics-server.kube-system/apis/metrics.k8s.io/v1beta1/namespaces/demo/pods/rami-56c4565dc4-mjsfw
## all pods
curl -H "Authorization: Bearer ${token}" -k https://kubernetes/apis/metrics.k8s.io/v1beta1/pods
#https://github.com/feiskyer/kubernetes-handbook/blob/master/en/addons/metrics.md
kubectl get --raw /apis/metrics.k8s.io/v1beta1/namespaces/default/pods
# external metrics server
curl -H "Authorization: Bearer $token" -k $APISERVER/apis/external.metrics.k8s.io/v1beta1/
## list
curl -H "Authorization: Bearer $token" -k $APISERVER/apis/external.metrics.k8s.io/v1beta1/externalmetricvalues
## get
curl -H "Authorization: Bearer $token" -k $APISERVER/apis/external.metrics.k8s.io/v1beta1/externalmetricvalues/ufa_backend_totalcount_smpl
# get with kubectl --raw
kubectl get --raw "/apis/external.metrics.k8s.io/v1beta1/externalmetricvalues"
kubectl port-forward --address 0.0.0.0 -n kube-system svc/prometheus 8081:9090
http://$ip:8081/graph?g0.range_input=1h&g0.expr=rate(container_cpu_usage_seconds_total%7Bpod_name%3D~%22.%2B%22%7D%5B5m%5D)%5B8d%3A%5D&g0.tab=1
http://$ip:8081/api/v1/query?query=kube_pod_container_resource_requests{pod_name=~"ramists-.+"}[1d:]
kube_pod_container_resource_requests{pod_name=~"ramists-.+"}[1d:]
# vpa-recommend 获取prometheus历史数据,需要修改vpa-recommend去掉job tag(因tkestack去掉了job tag),否则获取不到metrics
- args:
- --v=4
- --storage=prometheus
- --prometheus-address=http://prometheus.kube-system.svc.cluster.local:9090
- --prometheus-cadvisor-job-name=
- --container-name-label=pod_name
- --cpu-histogram-decay-half-life=10m
- --pod-recommendation-min-memory-mb=10
# 删除tke hpa-metrics-server的APIService,tke hpa-metrics-server有bug,获取不到所有namespace的metrics
kubectl delete APIService v1beta1.metrics.k8s.io
# 按官方说明提交metrics-server
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.3.6/components.yaml
# 会遇到下面报错
# E0527 16:23:16.112915 1 manager.go:111] unable to fully collect metrics: [unable to fully scrape metrics from source kubelet_summary:x.x.x.x: unable to fetch metrics from Kubelet x.x.x.x (x.x.x.x): Get https://x.x.x.x:10250/stats/summary?only_cpu_and_memory=true: x509: certificate signed by unknown authority...]
# 增加metrics-server参数:
- --kubelet-preferred-address-types=InternalIP,Hostname,ExternalIP
- --kubelet-insecure-tls
vpa去掉label的patch https://github.com/chenchun/autoscaler/commit/a974fdf5fa813ca276747cb523c4ddca54d141a8
diff --git a/vertical-pod-autoscaler/pkg/recommender/input/history/history_provider.go b/vertical-pod-autoscaler/pkg/recommender/input/history/history_provider.go
index 7dafa34c9..f965bb105 100644
--- a/vertical-pod-autoscaler/pkg/recommender/input/history/history_provider.go
+++ b/vertical-pod-autoscaler/pkg/recommender/input/history/history_provider.go
@@ -190,8 +190,12 @@ func (p *prometheusHistoryProvider) readLastLabels(res map[model.PodID]*PodHisto
func (p *prometheusHistoryProvider) GetClusterHistory() (map[model.PodID]*PodHistory, error) {
res := make(map[model.PodID]*PodHistory)
- podSelector := fmt.Sprintf("job=\"%s\", %s=~\".+\", %s!=\"POD\", %s!=\"\"",
- p.config.CadvisorMetricsJobName, p.config.CtrPodNameLabel,
+ var podSelector string
+ if p.config.CadvisorMetricsJobName != "" {
+ podSelector = fmt.Sprintf("job=\"%s\", ", p.config.CadvisorMetricsJobName)
+ }
+ podSelector = fmt.Sprintf("%s%s=~\".+\", %s!=\"POD\", %s!=\"\"",
+ podSelector, p.config.CtrPodNameLabel,
p.config.CtrNameLabel, p.config.CtrNameLabel)
// This query uses Prometheus Subquery notation, to gives us a result of a five minute cpu rate by default evaluated every 1minute for last config.HistoryLength days/hours/minutes. In order to change the evaluation step, you need change Prometheus global.evaluation_interval configuration parameter.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment