-
-
Save redmcg/60cfff7bca6f32969188008ad4a44c9a to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash | |
NODESAPI=/api/v1/nodes | |
function getNodes() { | |
kubectl get --raw $NODESAPI | jq -r '.items[].metadata.name' | |
} | |
function getPVCs() { | |
jq -s '[flatten | .[].pods[].volume[]? | select(has("pvcRef")) | '\ | |
'{name: .pvcRef.name, capacityBytes, usedBytes, availableBytes, '\ | |
'percentageUsed: (.usedBytes / .capacityBytes * 100)}] | sort_by(.name)' | |
} | |
function column() { | |
awk '{ for (i = 1; i <= NF; i++) { d[NR, i] = $i; w[i] = length($i) > w[i] ? length($i) : w[i] } } '\ | |
'END { for (i = 1; i <= NR; i++) { printf("%-*s", w[1], d[i, 1]); for (j = 2; j <= NF; j++ ) { printf("%*s", w[j] + 1, d[i, j]) } print "" } }' | |
} | |
function defaultFormat() { | |
awk 'BEGIN { print "PVC 1K-blocks Used Available Use%" } '\ | |
'{$2 = $2/1024; $3 = $3/1024; $4 = $4/1024; $5 = sprintf("%.0f%%",$5); print $0}' | |
} | |
function humanFormat() { | |
awk 'BEGIN { print "PVC Size Used Avail Use%" } '\ | |
'{$5 = sprintf("%.0f%%",$5); printf("%s ", $1); system(sprintf("numfmt --to=iec %s %s %s | sed '\''N;N;s/\\n/ /g'\'' | tr -d \\\\n", $2, $3, $4)); print " " $5 }' | |
} | |
function format() { | |
jq -r '.[] | "\(.name) \(.capacityBytes) \(.usedBytes) \(.availableBytes) \(.percentageUsed)"' | | |
$format | column | |
} | |
if [ "$1" == "-h" ]; then | |
format=humanFormat | |
else | |
format=defaultFormat | |
fi | |
for node in $(getNodes); do | |
kubectl get --raw $NODESAPI/$node/proxy/stats/summary | |
done | getPVCs | format |
Function getPVCs doesn't return anything, what could be the reason, is it coz of CSI plugin?
you need to have a kubectl proxy running.
kubectl proxy is running. Nothing returned from getPVCs.
jq -s '[flatten | .[].pods[].volume[]? | select(has("pvcRef"))]' returns nothing. I see the pod referenced in full json dump. No pvcRef.
Exec into the pod and see the mounted volume fine.
Tried it on kubernetes 1.14 and 1.15...nothing in stats/summary with pvcRef.
Using nfs-provisioner for dynamic provisioning.
Pointers to what I'm missing? I so want this to work!
You could try the following. Set up the following environment variables:
namespace=[namespace]
pod=[podname]
And then check your volume is listed when you run:
kubectl -n $namespace get pod $pod -o json | jq '.spec.volumes[]? | select(has("persistentVolumeClaim"))'
If not, then it's not recognised by Kubernetes as a PVC.
If it is, check the output of (edit: you'll need to have kubectl proxy
running for this bit):
node=$(kubectl -n $namespace get pod $pod -o json | jq -r '.spec.nodeName')
volume=$(kubectl -n $namespace get pod $pod -o json | jq -r '.spec.volumes[]? | select(has("persistentVolumeClaim")) | .name')
KUBEAPI=127.0.0.1:8001/api/v1/nodes
curl -s $KUBEAPI/$node/proxy/stats/summary | jq -s '.[].pods[] | select(.podRef.name == "'$pod'") | .volume[]? | select(.name == "'$volume'")'
If it's listed, but doesn't have the 'pvcRef' like the example below:
{
"time": "2020-02-27T08:06:36Z",
"availableBytes": 8031363072,
"capacityBytes": 8320901120,
"usedBytes": 272760832,
"inodesFree": 524057,
"inodes": 524288,
"inodesUsed": 231,
"name": "data",
"pvcRef": {
"name": "pvcRefExample",
"namespace": "default"
}
}
then you might need to dig around the internals of Kubernetes, or possibly your provisioner to understand why.
If there's no output at all from the curl statement, you might want to try:
curl -i $KUBEAPI/$node/proxy/stats/summary
to include the response headers, or even:
curl -v $KUBEAPI/$node/proxy/stats/summary
for additional debug. This might help identify why you're not getting any output.
same as @tomsherrod except I use Rook Ceph with FlexVolumes.
I do not have any PvcRef with : curl -i $KUBEAPI/$node/proxy/stats/summary
Is it the driver that does not pass PvcRef along?
Can't test with CSI drivers for the moment, I'll let you know
So I adjusted your script to work for my Microk8s env and double checked the results. It is missing pvcref as well. But I think I know why...hostpath does not display them. Each volume manager has to implement metrics. Hostpath doesn't even show limits within the container themselves let alone so its all kind of moot for me I guess.
Very useful, ty.
For those who don't want to use kubectl proxy
#!/usr/bin/env bash
function getNodes() {
kubectl get --raw=/api/v1/nodes | jq -r '.items[].metadata.name'
}
function getPVCs() {
jq -s '[flatten | .[].pods[].volume[]? | select(has("pvcRef")) | '\
'{name: .pvcRef.name, capacityBytes, usedBytes, availableBytes, '\
'percentageUsed: (.usedBytes / .capacityBytes * 100)}] | sort_by(.name)'
}
function column() {
awk '{ for (i = 1; i <= NF; i++) { d[NR, i] = $i; w[i] = length($i) > w[i] ? length($i) : w[i] } } '\
'END { for (i = 1; i <= NR; i++) { printf("%-*s", w[1], d[i, 1]); for (j = 2; j <= NF; j++ ) { printf("%*s", w[j] + 1, d[i, j]) } print "" } }'
}
function defaultFormat() {
awk 'BEGIN { print "PVC 1K-blocks Used Available Use%" } '\
'{$2 = $2/1024; $3 = $3/1024; $4 = $4/1024; $5 = sprintf("%.0f%%",$5); print $0}'
}
function humanFormat() {
awk 'BEGIN { print "PVC Size Used Avail Use%" } '\
'{$5 = sprintf("%.0f%%",$5); printf("%s ", $1); system(sprintf("numfmt --to=iec %s %s %s | sed '\''N;N;s/\\n/ /g'\'' | tr -d \\\\n", $2, $3, $4)); print " " $5 }'
}
function format() {
jq '.[] | "\(.name) \(.capacityBytes) \(.usedBytes) \(.availableBytes) \(.percentageUsed)"' |
sed 's/^"\|"$//g' |
$format | column
}
if [ "$1" == "-h" ]; then
format=humanFormat
else
format=defaultFormat
fi
for node in $(getNodes); do
kubectl get --raw=/api/v1/nodes/$node/proxy/stats/summary
done | getPVCs | format
@brun0queiroz That's much better. Thanks for sharing that. I've updated the gist
I am getting this error: parse error:
parse error: Invalid numeric literal at line 1, column 10
PVC Size Used Avail Use%
That's an error from jq
. To find out why, you might want to view the individual responses. You can try something similar to what's described here:
https://gist.github.com/redmcg/60cfff7bca6f32969188008ad4a44c9a#gistcomment-3191345
Linode LKE returns nothing. What could be the reason?
It will likely be a result of the csi driver in use:
https://kubernetes-csi.github.io/docs/drivers.html
The driver may not be providing k8s their metrics.
You could follow the instructions here to get a better picture:
https://gist.github.com/redmcg/60cfff7bca6f32969188008ad4a44c9a#gistcomment-3191345
Thank you!
That's awesome, sir!
Works like a charm, thank you very much :)
Awesome, love this script
my customer requested me exactly this output, thank you very much for sharing
Could you please help me to add "NAMESPACE" column as well along with "PVC Capacity Used Available Used%" in your script.
Thank you once again!
Thanks for the kind words.
I'd love to help, but unfortunately the only k8s cluster I have access to at the moment is minikube with the default storage provisioner; which doesn't provide the metrics required by this script.
The only guidance I can provide is that because the physical volume itself is not namespaced, it would probably require additional calls to find the pvc and from there the namespace could be extracted.
I'd love to help, but unfortunately the only k8s cluster I have access to at the moment is...
I wonder if it's a good idea to take this script and use it as the beginning of a new open source CLI tool (or, use it as inspiration for adding a feature to an existing CLI tool if one exists). This use case seems so popular. People like to be able to scan through all the data they've stored in their clusters' disks and learn more about the disks.
kubectl
has support for plugins, which can be managed via krew (a kubectl
plugin itself). It looks like a plugin that could be used in place of this script has already been created:
https://github.com/yashbhutwala/kubectl-df-pv
That's neat. That plugin's README says it's only compatible with GKE right now though. This script's approach is just pure Kubernetes, right?
Yeah, I'm using the kubernates rest api. But I took a quick look at the code for kubectl-df-pv
, and it looks like it's using the same approach. I couldn't see anything that suggested this script would provide a greater level of support; but testing would confirm.
@redmcg
I have managed to add namespace column to your existing code
kubectl get --raw $NODESAPI/$node/proxy/stats/summary|jq -s '[flatten | .[].pods[].volume[]? | select(has("pvcRef")) | ''{namespace: .pvcRef.namespace, name: .pvcRef.name, capacityBytes, usedBytes, availableBytes, ''percentageUsed: (.usedBytes / .capacityBytes * 100)}] | sort_by(.namespace)'|jq -r '.[] | "(.namespace) (.name) (.capacityBytes) (.usedBytes) (.availableBytes) (.percentageUsed)"'|awk '{$3 = $3/(102410241024); $4 = $4/(10241024); $5= $5/(10241024*1024); $6 = sprintf("%.0f%%",$6); print $0}'
Namsespace PVC Capacity Used Available Used%
Thank you @redmcg
Good work @vsadanala ! And thanks for sharing.
nice ! thanks
I would had this > 50% usage
bash kubedf -h |tr -d '%' |awk '$NF > 50' | sed -e 's/$/%/'
Hi,
I just added an option to list the pvc information filtered with a namespace based on the script above:
#!/usr/bin/env bash
function getNodes() {
kubectl get --raw=/api/v1/nodes | jq -r '.items[].metadata.name'
}
function getPVCs() {
jq -s '[flatten | .[].pods[].volume[]? | select(has("pvcRef")) | '
'{namespace: .pvcRef.namespace, name: .pvcRef.name, capacityBytes, usedBytes, availableBytes, '
'percentageUsed: (.usedBytes / .capacityBytes * 100)}] | sort_by(.namespace)'
}
function column() {
awk '{ for (i = 1; i <= NF; i++) { d[NR, i] = $i; w[i] = length($i) > w[i] ? length($i) : w[i] } } '
'END { for (i = 1; i <= NR; i++) { printf("%-*s", w[1], d[i, 1]); for (j = 2; j <= NF; j++ ) { printf("%*s", w[j] + 1, d[i, j]) } print "" } }'
}
function defaultFormat() {
awk 'BEGIN { print "Namespace PVC 1K-blocks Used Available Use%" } '
'{$3 = $3/1024; $4 = $4/1024; $5 = $5/1024; $6 = sprintf("%.0f%%",$6); print $0}'
}
function humanFormat() {
awk 'BEGIN { print "Namespace PVC Size Used Avail Use%" } '
'{$6 = sprintf("%.0f%%",$6); printf("%s ", $1); printf("%s ", $2); system(sprintf("numfmt --to=iec %s %s %s | sed '''N;N;s/\n/ /g''' | tr -d \\n", $3, $4, $5)); print " " $6 }'
}
function format() {
jq '.[] | "(.namespace) (.name) (.capacityBytes) (.usedBytes) (.availableBytes) (.percentageUsed)"' |
sed 's/^"|"$//g' |
$format | column
}
function get_pvc_info(){
local _format=${1:-h} && shift
local namespace=${1:-""}
format=humanFormat
[[ "${_format,,}" != "h" ]] && format=defaultFormat
table=$(for node in $(getNodes); do
kubectl get --raw=/api/v1/nodes/${node}/proxy/stats/summary
done | getPVCs | format)
header=$(echo -e "${table}" | head -1)
echo -e "${header}"
[[ -n "${namespace}" ]] && echo -e "${table}" | grep -w "^${namespace} " || echo -e "${table}"
}
get_pvc_info "$@"
Function getPVCs doesn't return anything, what could be the reason, is it coz of CSI plugin?