vishiy · July 3, 2024 22:49
diff --git a/AzureMonitorContainers-Metrics-Loganalytics.csv b/AzureMonitorContainers-Metrics-Loganalytics.csv
diff --git a/AzureMonitorContainers-Metrics-MetricStore.csv b/AzureMonitorContainers-Metrics-MetricStore.csv
MetricCategory	MetricName	MetricDimensions	MetricType	MetricTable	MetricNamespace	MetricOrigin	Comments
Node-CPU	cpuAllocatableNanoCores	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			Amount of cpu that is allocatable by Kubernetes to run pods, expressed in nanocores/nanocpu unit
Node-CPU	cpuCapacityNanocores	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			Total CPU capacity of the node in nanocore/nanocpu unit
Node-CPU	cpuUsageNanocores	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			CPU used by node in nanocore/nanocpu unit
Node-Memory	memoryAllocatableBytes	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			Amount of memory in bytes that is allocatable by kubernetes to run pods
Node-Memory	memoryCapacityBytes	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			Total memory capacity of the node in bytes
Node-Memory	memoryRssBytes	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			Rss memory used by the node in bytes. Collected only for Linux nodes
Node-Memory	memoryWorkingSetBytes	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			Working set memory used by the node in bytes
Node-Other	restartTimeEpoch	Objectname='K8SNode', Instancename=<nodename>	Gauge	Perf			Last time node restarted in epoch seconds
Node-DiskUsage	free	device,hostName,path,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/disk	container.azm.ms/telegraf	Free disk space in bytes (excludes --tmpfs, devtmpfs, devfs, overlay, aufs, squashfs)
Node-DiskUsage	used	device,hostName,path,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/disk	container.azm.ms/telegraf	Used disk space in bytes (excludes --tmpfs, devtmpfs, devfs, overlay, aufs, squashfs)
Node-DiskUsage	used_percent	device,hostName,path,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/disk	container.azm.ms/telegraf	Used disk space in percentage (excludes--tmpfs, devtmpfs, devfs, overlay, aufs, squashfs)
Node-DiskIO	reads	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of reads (incremented when I/O request completes)--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-DiskIO	read_bytes	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of bytes read from the block device--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-DiskIO	read_time	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of milliseconds that read requests have waited on the block device--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-DiskIO	writes	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of writes (incremented when I/O request completes)--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-DiskIO	write_bytes	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of bytes written to the block device--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-DiskIO	write_time	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of milliseconds that write requests have waited on the block device--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-DiskIO	io_time	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of milliseconds during which the device has had I/O requests queued--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-DiskIO	iops_in_progress	hostName,name,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/diskio	container.azm.ms/telegraf	Number of I/O requests that have been issued to device driver but have not yet completed--(filtered for devices having names with the regex pattern "sd[a-z][0-9]")
Node-GPU	nodeGpuAllocatable	gpuVendor,Computer,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/gpu	container.azm.ms	Number of allocatable GPUs in the node at any point in time
Node-GPU	nodeGPUCapacity	gpuVendor,Computer,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/gpu	container.azm.ms	Total number of GPUs in the node
Node-Network	bytes_recv	hostName,interface,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/net	container.azm.ms/telegraf	Total number of bytes received by the interface
Node-Network	bytes_sent	hostName,interface,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/net	container.azm.ms/telegraf	Total number of bytes sent by the interface
Node-Network	err_in	hostName,interface,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/net	container.azm.ms/telegraf	Total number of receive errors detected by the interface
Node-Network	err_out	hostName,interface,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/net	container.azm.ms/telegraf	Total number of transmit errors detected by the interface
Container-CPU	cpuRequestNanoCores	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Container's cpu request in nanocore/nanocpu unit
Container-CPU	cpuLimitNanoCores	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Container's cpu limit in nanocore/nanocpu unit. If limits are not specified, node's capacity will be rolled-up as container's limit
Container-CPU	cpuUsageNanoCores	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Container's CPU usage in nanocore/nanocpu unit
Container-Memory	memoryRequestBytes	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Container's memory request in bytes
Container-Memory	memoryLimitBytes	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Container's memory limit in bytes. If limits are not specified, node's capacity will be rolled-up as container's limit
Container-Memory	memoryRssBytes	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Container's rss memory usage in bytes. Collected only for containers running in Linux nodes
Container-Memory	memoryWorkingSetBytes	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Container's working set memory usage in bytes
Container-Other	restartTimeEpoch	Objectname='K8SContainer', Instancename=podUID/containerName	Gauge	Perf			Last time the container restarted in epoch seconds
Container-GPU	containerGpuRequests	containerName=podUID/containerName,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/gpu	container.azm.ms	Number of GPUs requested by the container
Container-GPU	containerGpuLimits	containerName=podUID/containerName,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/gpu	container.azm.ms	Container's GPU limit
Container-GPU	containerGpuDutyCycle	containerName=podUID/containerName,gpuId,gpuModel,gpuVendor,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/gpu	container.azm.ms	Percentage of time over the past sample period during which GPU was busy/actively processing for a container. Duty cycle is a number between 1 and 100
Container-GPU	containerGpumemoryTotalBytes	containerName=podUID/containerName,gpuId,gpuModel,gpuVendor,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/gpu	container.azm.ms	Total GPU memory available for the container
Container-GPU	containerGpumemoryUsedBytes	containerName=podUID/containerName,gpuId,gpuModel,gpuVendor,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/gpu	container.azm.ms	GPU memory used by the container
Pod-PV	pvUsedBytes	podUID,podName,podNamespace,pvName,pvcName,pvCapacityBytes,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/pv	container.azm.ms	Used space in bytes for a specific PV consumed by a specific Pod
Controller-Deployments	kube_deployment_status_replicas_ready	creationTime,deployment,deploymentStrategy,k8sNamespace,spec_replicas,status_replicas_available,status_replicas_updated,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/kubestate	container.azm.ms	Total number of ready pods targeted by deployment (status.readyReplicas)
Controller-HPA	kube_hpa_status_current_replicas	creationTime,hpa,k8sNamespace,lastScaleTime,spec_max_replicas,spec_min_replicas,status_desired_replicas,targetKind,targetName	Gauge	InsightsMetrics	container.azm.ms/kubestate	container.azm.ms	Current number of replicas of pods managed by this autoscaler (status.currentReplicas)
Kubelet	kubelet_docker_operations	hostName,operation_type,scrapeUrl,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/prometheus		Cumulative number of Docker operations by operation type
Kubelet	kubelet_docker_operations_errors	hostName,operation_type,scrapeUrl,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/prometheus	container.azm.ms/telegraf	Cumulative number of Docker operation errors by operation type
Kubelet	kubelet_running_pod_count	hostName,scrapeUrl,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/prometheus	container.azm.ms/telegraf	Number of pods currently running
Kubelet	volume_manager_total_volumes	hostname,plugin_name,scrapeUrl,state,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/prometheus	container.azm.ms/telegraf	Number of volumes in Volume Manager
Kubelet	kubelet_node_config_error	hostName,scrapeUrl,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/prometheus	container.azm.ms/telegraf	This metric is true (1) if the node is experiencing a configuration-related error, false (0) otherwise
Kubelet	process_resident_memory_bytes	hostName,scrapeUrl,clusterId,clusterName	Gauge	InsightsMetrics	container.azm.ms/prometheus	container.azm.ms/telegraf	Kubelet's resident memory size in bytes
Kubelet	process_cpu_seconds_total	hostName,scrapeUrl,clusterId,clusterName	Counter	InsightsMetrics	container.azm.ms/prometheus	container.azm.ms/telegraf	Kubelet's total user and system CPU time spent in seconds