集群监控概览
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
CPU Requests Commitment | sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"}) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | kube_node_status_allocatable_cpu_cores | kube-state-metrics |
CPU Limits Commitment | sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"}) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | kube_node_status_allocatable_cpu_cores | kube-state-metrics |
Memory Utilisation | 1 - sum(:node_memory_MemAvailable_bytes:sum{cluster="$cluster"}) / sum(node_memory_MemTotal_bytes{cluster="$cluster"}) | node_memory_MemAvailable_bytes | node-exporter |
| | node_memory_MemTotal_bytes | node-exporter |
Memory Requests Commitment | sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"}) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | kube_node_status_allocatable_memory_bytes | kube-state-metrics |
Memory Limits Commitment | sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"}) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| | kube_node_status_allocatable_memory_bytes | kube-state-metrics |
Node Count | count(kube_node_info{cluster="$cluster"}) | kube_node_info | kube-state-metrics |
Pod Count | count(kube_pod_info{cluster="$cluster"}) | kube_pod_info | kube-state-metrics |
Node Request CPU Average Percent | avg(sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_cpu_cores{cluster="$cluster"})by(node)) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | kube_node_status_capacity_cpu_cores | kube-state-metrics |
Node Request Memory Average Percent | avg(sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_memory_bytes{cluster="$cluster"})by(node)) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | kube_node_status_capacity_memory_bytes | kube-state-metrics |
API Server Success Request Percent
| sum(irate(apiserver_request_total{cluster="$cluster",code=~"20.*",verb=~"GET|LIST"}[5m]))/sum(irate(apiserver_request_total{cluster="$cluster",verb=~"GET|LIST"}[5m])) | apiserver_request_total | kube-apiserver |
| | apiserver_request_total | kube-apiserver |
Namespace Overview | count(kube_pod_info{cluster="$cluster"}) by (namespace) | kube_pod_info | kube-state-metrics |
| count(kube_service_info{cluster="$cluster"}) by(namespace) | kube_service_info | kube-state-metrics |
| count(kube_pod_container_info{cluster="$cluster"}) by(namespace) | kube_pod_container_info | kube-state-metrics |
| count(kube_configmap_info{cluster="$cluster"}) by(namespace) | kube_configmap_info | kube-state-metrics |
| count(kube_secret_info{cluster="$cluster"}) by(namespace) | kube_secret_info | kube-state-metrics |
| count(kube_deployment_created{cluster="$cluster"}) by (namespace) | kube_deployment_created | kube-state-metrics |
| count(kube_statefulset_created{cluster="$cluster"}) by (namespace) | kube_statefulset_created | kube-state-metrics |
| count(kube_job_created{cluster="$cluster"}) by (namespace) | kube_job_created | kube-state-metrics |
| count(kube_cronjob_created{cluster="$cluster"}) by (namespace) | kube_cronjob_created | kube-state-metrics |
| count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace) - (count(kube_pod_status_phase{cluster="$cluster",phase="Succeeded"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace) | kube_pod_status_ready | kube-state-metrics |
| | kube_pod_status_phase | kube-state-metrics |
| | kube_pod_status_ready | kube-state-metrics |
| count(kube_deployment_status_replicas_ready{cluster="$cluster"}<kube_deployment_spec_replicas{cluster="$cluster"}) by (namespace) | kube_deployment_status_replicas_ready | kube-state-metrics |
| | kube_deployment_spec_replicas | kube-state-metrics |
| count(kube_statefulset_status_replicas_ready{cluster="$cluster"}<kube_statefulset_replicas{cluster="$cluster"}) by (namespace) | kube_statefulset_status_replicas_ready | kube-state-metrics |
| | kube_statefulset_replicas | kube-state-metrics |
| count(kube_daemonset_status_number_unavailable{cluster="$cluster"}>0)by(namespace) | kube_daemonset_status_number_unavailable | kube-state-metrics |
| count(kube_job_status_failed{cluster="$cluster"} == 1) by (namespace) | kube_job_status_failed | kube-state-metrics |
| count(kube_daemonset_created{cluster="$cluster"}) by (namespace) | kube_daemonset_created | kube-state-metrics |
| count(kube_persistentvolumeclaim_info{cluster="$cluster"}) by (namespace) | kube_persistentvolumeclaim_info | kube-state-metrics |
CPU Usage | sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
CPU Quota | sum(kube_pod_owner{cluster="$cluster"}) by (namespace) | kube_pod_owner | kube-state-metrics |
| count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace) | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
Memory Usage (working_set) | sum(container_memory_working_set_bytes{cluster="$cluster", container!="", container!="POD"}) by (namespace) | container_memory_working_set_bytes | cadvisor |
Memory Requests | sum(kube_pod_owner{cluster="$cluster"}) by (namespace) | kube_pod_owner | kube-state-metrics |
| count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace) | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) | container_memory_rss | cadvisor |
| sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace) | container_memory_rss | cadvisor |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace) | container_memory_rss | cadvisor |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Node Memory Usage (Top 10) | sum(label_replace(topk(10, 1-(node_memory_MemAvailable_bytes{cluster="$cluster"} / node_memory_MemTotal_bytes{cluster="$cluster"})), "node_ip", "$1", "instance", "(.*)"))by(node_ip) | node_memory_MemAvailable_bytes | node-exporter |
| | node_memory_MemTotal_bytes | node-exporter |
Node CPU Usage (Top 10) | topk(10, sum(label_replace(1 - sum(rate(node_cpu_seconds_total{cluster="$cluster",mode="idle"}[1m])) by (instance) / sum(rate(node_cpu_seconds_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip)) | node_cpu_seconds_total | node-exporter |
| | node_cpu_seconds_total | node-exporter |
Node Disk Usage (Top 10) | topk(10, sum(label_replace(1-node_filesystem_free_bytes{cluster="$cluster",mountpoint="/"}/node_filesystem_size_bytes{cluster="$cluster",mountpoint="/",fstype!="rootfs"},"host_ip","$1","instance","(.*)"))by(host_ip)) | node_filesystem_free_bytes | node-exporter |
Node Network In (Top 10) | topk(10, sum(label_replace(max(irate(node_network_receive_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip)) | node_network_receive_bytes_total | node-exporter |
Node Network Out (Top 10) | topk(10, sum(label_replace(max(irate(node_network_transmit_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip)) | node_network_transmit_bytes_total | node-exporter |
Node Sockets Count(Top 10) | topk(10, sum(label_replace(max(node_sockstat_TCP_alloc{cluster="$cluster"}) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip)) | node_sockstat_TCP_alloc | node-exporter |
Container Memory Usage(Top10) | topk(10, sum (container_memory_working_set_bytes{cluster="$cluster",container !="",container!="POD"}) by (container)) | container_memory_working_set_bytes | cadvisor |
Container Memory Usage/Limit(Top10) | topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace)) | container_memory_working_set_bytes | cadvisor |
| | container_spec_memory_limit_bytes | cadvisor |
Container CPU Usage(Top10) | topk(10, sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",container !="",container!="POD"}[2m])) by (container)) | container_cpu_usage_seconds_total | cadvisor |
Container Network | topk(10, sum(irate(container_network_receive_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod)) | container_network_receive_bytes_total | cadvisor |
| -topk(10, sum(irate(container_network_transmit_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod)) | container_network_transmit_bytes_total | cadvisor |
Container Memory Usage/Limit (Top 10) | topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace)) | container_memory_working_set_bytes | cadvisor |
| | container_spec_memory_limit_bytes | cadvisor |
Container CPU Usage (Top 10) | topk(10, sum(irate(container_cpu_usage_seconds_total{cluster="$cluster",container!="",container!="POD"}[1m])) by (container,pod,namespace)or on() vector(0)) | container_cpu_usage_seconds_total | cadvisor |
Container Socket Count(Top 10)
| topk(10, sum(container_sockets{cluster="$cluster",container!=""}) by (container,pod,namespace)or on() vector(0)) | container_sockets | cadvisor |
集群 Namespace 大盘
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
CPU Usage | sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m])) | container_cpu_usage_seconds_total | cadvisor |
CPU Usage/Request(%) | sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"}) | container_cpu_usage_seconds_total | cadvisor |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU Usage/Limit(%) | sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"}) or on() vector(0) | container_cpu_usage_seconds_total | cadvisor |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
CPU Request | sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"}) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU Limit | sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"}) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
Cluster Available | sum(sum(kube_node_status_capacity{resource="cpu",cluster="$cluster",namespace=~"$namespace"}) by (node) + sum(kube_node_spec_unschedulable{cluster="$cluster",namespace=~"$namespace"}==0) by(node)) | kube_node_status_capacity | kube-state-metrics |
| | kube_node_spec_unschedulable | kube-state-metrics |
StatefulSet Created | count(kube_statefulset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_statefulset_created | kube-state-metrics |
Pod Created | count(kube_pod_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_pod_info | kube-state-metrics |
Containers | count(kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_pod_container_info | kube-state-metrics |
DaemonSet Created | count(kube_daemonset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_daemonset_created | kube-state-metrics |
Job Created | count(kube_job_info{cluster="$cluster",namespace="$namespace"})or on() vector(0) | kube_job_info | kube-state-metrics |
Job Active | count(kube_job_status_active{cluster="$cluster",namespace="$namespace"}==1)or on() vector(0) | kube_job_status_active | kube-state-metrics |
Cron Job Created | count(kube_cronjob_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_cronjob_created | kube-state-metrics |
Cron Job Active | count(kube_cronjob_status_active{cluster="$cluster",namespace="$namespace"}==1) or on() vector(0) | kube_cronjob_status_active | kube-state-metrics |
Unbound PVC | count(kube_persistentvolumeclaim_status_phase{phase!="Bound", cluster="$cluster",namespace="$namespace"}==1) or on() vector(0) | kube_persistentvolumeclaim_status_phase | kube-state-metrics |
PersistentVolumeClaim Created | count(kube_persistentvolumeclaim_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_persistentvolumeclaim_info | kube-state-metrics |
Service Created | count(kube_service_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_service_info | kube-state-metrics |
LoadBalancer Created | count(kube_service_spec_type{type="LoadBalancer", cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_service_spec_type | kube-state-metrics |
Ingress Created | count(kube_ingress_info{cluster="$cluster",namespace="$namespace"})or on() vector(0) | kube_ingress_info | kube-state-metrics |
ConfigMap Created | count(kube_configmap_info{cluster="$cluster",namespace="$namespace"}) | kube_configmap_info | kube-state-metrics |
Secret Created | count(kube_secret_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_secret_info | kube-state-metrics |
PVC Storage Requests Total | sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_persistentvolumeclaim_resource_requests_storage_bytes | kube-state-metrics |
Pod NotReady | count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace) - (count(kube_pod_status_phase{phase="Succeeded", cluster="$cluster",namespace="$namespace"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace) | kube_pod_status_ready | kube-state-metrics |
| | kube_pod_status_phase | kube-state-metrics |
| | kube_pod_status_ready | kube-state-metrics |
Pod UnSchedulable | count(kube_pod_status_unschedulable{cluster="$cluster",namespace="$namespace"}) or on() vector(0) | kube_pod_status_unschedulable | kube-state-metrics |
Deployment NotReady | count(sum(kube_deployment_status_replicas_ready{cluster="$cluster",namespace="$namespace"}) by (deployment)<sum(kube_deployment_spec_replicas{cluster="$cluster",namespace="$namespace"}) by (deployment)) or on() vector(0) | kube_deployment_status_replicas_ready | kube-state-metrics |
| | kube_deployment_spec_replicas | kube-state-metrics |
Daemonset NotReady | count(kube_daemonset_status_number_unavailable{cluster="$cluster",namespace="$namespace"}>0) or on() vector(0) | kube_daemonset_status_number_unavailable | kube-state-metrics |
Job Failed | count(kube_job_status_failed{cluster="$cluster",namespace="$namespace"} == 1) | kube_job_status_failed | kube-state-metrics |
CPU Usage | sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m])) or on() vector(0) | container_cpu_usage_seconds_total | cadvisor |
CPU Quota | sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
Memory Usage | sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}) or on() vector(0) | container_memory_working_set_bytes | cadvisor |
Memory Usage/Request(%) | sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0) | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
Memory Usage/Limit(%) | sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0) | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Memory Request | sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
Memory Limit | sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Cluster Available | sum(sum(kube_node_status_capacity{resource="memory"}) by (node) + sum(kube_node_spec_unschedulable==0) by(node)) or on() vector(0) | kube_node_status_capacity | kube-state-metrics |
| | kube_node_spec_unschedulable | kube-state-metrics |
Memory Usage (w/o cache) | sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}) by (pod) | container_memory_working_set_bytes | cadvisor |
| scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"}) | kube_resourcequota | kube-state-metrics |
| scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"}) | kube_resourcequota | kube-state-metrics |
Memory Quota | sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) | container_memory_working_set_bytes | cadvisor |
| sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}) by (pod) | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"}) by (pod) | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| sum(container_memory_rss{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) | container_memory_rss | cadvisor |
| sum(container_memory_cache{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) | container_memory_cache | cadvisor |
| sum(container_memory_swap{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) | container_memory_swap | cadvisor |
Containers | group by (image, container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) | kube_pod_container_info | kube-state-metrics |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace"}) | kube_pod_container_info | kube-state-metrics |
| | kube_pod_container_status_running | kube-state-metrics |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace"}) | kube_pod_container_info | kube-state-metrics |
| | kube_pod_container_status_restarts_total | kube-state-metrics |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(irate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[1m])) by (pod,container) | kube_pod_container_info | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max(container_spec_cpu_quota{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000 > 0) by (container,pod))) | kube_pod_container_info | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| | container_spec_cpu_quota | cadvisor |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"}) | kube_pod_container_info | kube-state-metrics |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"}))) | kube_pod_container_info | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace"}) | kube_pod_container_info | kube-state-metrics |
| | kube_pod_container_resource_limits | kube-state-metrics |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) | kube_pod_container_info | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max(container_spec_memory_limit_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod, container) < 1) | kube_pod_container_info | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| | container_spec_memory_limit_bytes | cadvisor |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}) | kube_pod_container_info | kube-state-metrics |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"})) | kube_pod_container_info | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
API Server(独立集群)
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Availability > 99.000% | 1 - (
(
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m]))
-
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m]))
)
+
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0))
)
/
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m])) | apiserver_request_duration_seconds_count | kube-apiserver |
| | apiserver_request_duration_seconds_bucket | kube-apiserver |
| | apiserver_request_total | kube-apiserver |
ErrorBudget > 99.000% | 100 *
(1 - (
(
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m]))
-
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m]))
)
+
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0))
)
/
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m]))
-0.990000) | apiserver_request_duration_seconds_count | kube-apiserver |
| | apiserver_request_duration_seconds_bucket | kube-apiserver |
| | apiserver_request_total | kube-apiserver |
Read Availability | 1 - (
(
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET", cluster="$cluster"}[5m]))
-
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",le="1", cluster="$cluster"}[5m]))
)
+
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..", cluster="$cluster"}[5m]) or vector(0))
)
/
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET", cluster="$cluster"}[5m])) | apiserver_request_duration_seconds_count | kube-apiserver |
| | apiserver_request_duration_seconds_bucket | kube-apiserver |
| | apiserver_request_total | kube-apiserver |
Read SLI - Requests | sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m])) | apiserver_request_total | kube-apiserver |
Read SLI - Errors | sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..",cluster="$cluster"}[5m]))/
sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m])) | apiserver_request_total | kube-apiserver |
Read SLI - Duration | histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))) > 0 | apiserver_request_duration_seconds_bucket | kube-apiserver |
Write Availability | 1 - (
(
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m]))
-
sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1", cluster="$cluster"}[5m]))
)
+
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..", cluster="$cluster"}[5m]) or vector(0))
)
/
sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m]))
| apiserver_request_duration_seconds_count | kube-apiserver |
| | apiserver_request_duration_seconds_bucket | kube-apiserver |
| | apiserver_request_total | kube-apiserver |
Write SLI - Requests | sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m])) | apiserver_request_total | kube-apiserver |
Write SLI - Errors | sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..",cluster="$cluster"}[5m]))/
sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m])) | apiserver_request_total | kube-apiserver |
Write SLI - Duration | histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))) > 0 | apiserver_request_duration_seconds_bucket | kube-apiserver |
Work Queue Add Rate | sum(rate(workqueue_adds_total{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name) | workqueue_adds_total | kubelet |
Work Queue Depth | sum(rate(workqueue_depth{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name) | workqueue_depth | kubelet |
Work Queue Latency | histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name, le)) | workqueue_queue_duration_seconds_bucket | kubelet |
Memory | process_resident_memory_bytes{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"} | process_resident_memory_bytes | node-exporter |
CPU usage | rate(process_cpu_seconds_total{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"}[5m]) | process_cpu_seconds_total | node-exporter |
Controller Manager(独立集群)
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Up | sum(up{cluster=~"$cluster",job="kube-controller-manager"}) | up | kubelet |
Work Queue Add Rate | sum(rate(workqueue_adds_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name) | workqueue_adds_total | kubelet |
Work Queue Depth | sum(rate(workqueue_depth{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name) | workqueue_depth | kubelet |
Work Queue Latency | histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name, le)) | workqueue_queue_duration_seconds_bucket | kubelet |
Kube API Request Rate | sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"2.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"3.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"4.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"5.."}[5m])) | rest_client_requests_total | kubelet |
Post Request Latency 99th Quantile | histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Get Request Latency 99th Quantile | histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Memory | process_resident_memory_bytes{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"} | process_resident_memory_bytes | node-exporter |
CPU usage | rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"}[5m]) | process_cpu_seconds_total | node-exporter |
Kubelet
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Up | sum(up{cluster="$cluster", job="kubelet"}) | up | kubelet |
Running Pods | sum(kubelet_running_pods{cluster="$cluster", job="kubelet", instance=~"$instance"}) | kubelet_running_pods | kubelet |
Running Container | sum(kubelet_running_containers{cluster="$cluster", job="kubelet", instance=~"$instance"}) | kubelet_running_containers | kubelet |
Actual Volume Count | sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance", state="actual_state_of_world"}) | volume_manager_total_volumes | kubelet |
Desired Volume Count | sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance",state="desired_state_of_world"}) | volume_manager_total_volumes | kubelet |
Config Error Count | sum(rate(kubelet_node_config_error{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) | kubelet_node_config_error | kubelet |
Operation Rate | sum(rate(kubelet_runtime_operations_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (operation_type, instance) | kubelet_runtime_operations_total | kubelet |
Operation Error Rate | sum(rate(kubelet_runtime_operations_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type) | kubelet_runtime_operations_errors_total | kubelet |
Operation duration 99th quantile | histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type, le)) | kubelet_runtime_operations_duration_seconds_bucket | kubelet |
Pod Start Rate | sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance) | kubelet_pod_start_duration_seconds_count | kubelet |
| sum(rate(kubelet_pod_worker_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance) | kubelet_pod_worker_duration_seconds_count | kubelet |
Pod Start Duration | histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le)) | kubelet_pod_start_duration_seconds_count | kubelet |
| histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le)) | kubelet_pod_worker_duration_seconds_bucket | kubelet |
Storage Operation Rate | sum(rate(storage_operation_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin) | storage_operation_duration_seconds_count | kubelet |
Storage Operation Error Rate | sum(rate(storage_operation_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin) | storage_operation_errors_total | kubelet |
Storage Operation Duration 99th quantile | histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin, le)) | storage_operation_duration_seconds_bucket | kubelet |
Cgroup manager operation rate | sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type) | kubelet_cgroup_manager_duration_seconds_count | kubelet |
Cgroup manager 99th quantile | histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type, le)) | kubelet_cgroup_manager_duration_seconds_bucket | kubelet |
PLEG relist rate | sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance) | kubelet_pleg_relist_duration_seconds_count | kubelet |
PLEG relist interval | histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le)) | kubelet_pleg_relist_interval_seconds_bucket | kubelet |
PLEG relist duration | histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le)) | kubelet_pleg_relist_duration_seconds_bucket | kubelet |
RPC Rate | sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"2.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"3.."}[5m])) | rest_client_requests_total | kubelet |
Request duration 99th quantile | histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster="$cluster",job="kubelet", instance=~"$instance"}[5m])) by (instance, verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Memory | process_resident_memory_bytes{cluster="$cluster",job="kubelet",instance=~"$instance"} | process_resident_memory_bytes | node-exporter |
CPU usage | rate(process_cpu_seconds_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m]) | process_cpu_seconds_total | node-exporter |
Goroutines | go_goroutines{cluster="$cluster",job="kubelet",instance=~"$instance"} | go_goroutines | node-exporter |
Proxy(非默认安装组件)
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Up | sum(up{job="kube-proxy"}) | up | kubelet |
Rules Sync Rate | sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m])) | kubeproxy_sync_proxy_rules_duration_seconds_count | kube-proxy |
Rule Sync Latency 99th Quantile | histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m])) | kubeproxy_sync_proxy_rules_duration_seconds_bucket | kube-proxy |
Network Programming Rate | sum(rate(kubeproxy_network_programming_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m])) | kubeproxy_network_programming_duration_seconds_count | kube-proxy |
Network Programming Latency 99th Quantile | histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m])) by (instance, le)) | kubeproxy_network_programming_duration_seconds_bucket | kube-proxy |
Kube API Request Rate | sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m])) | rest_client_requests_total | kubelet |
Post Request Latency 99th Quantile
| histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Kube API Request Rate | sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m])) | rest_client_requests_total | kubelet |
Post Request Latency 99th Quantile | histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Get Request Latency 99th Quantile | histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Memory | process_resident_memory_bytes{job="kube-proxy",instance=~"$instance"} | process_resident_memory_bytes | node-exporter |
CPU usage | rate(process_cpu_seconds_total{job="kube-proxy",instance=~"$instance"}[5m]) | process_cpu_seconds_total | node-exporter |
Scheduler(独立集群)
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Up | sum(up{cluster=~"$cluster", job="kube-scheduler"}) | up | kubelet |
Kube API Request Rate | sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"2.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"3.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"4.."}[5m])) | rest_client_requests_total | kubelet |
| sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"5.."}[5m])) | rest_client_requests_total | kubelet |
Post Request Latency 99th Quantile | histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Get Request Latency 99th Quantile | histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le)) | rest_client_request_duration_seconds_bucket | kubelet |
Memory | process_resident_memory_bytes{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"} | process_resident_memory_bytes | node-exporter |
CPU usage | rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"}[5m]) | process_cpu_seconds_total | node-exporter |
集群节点监控详情
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
服务器资源总览表 | node_uname_info{job=~"$job", cluster=~"$cluster"} - 0 | node_uname_info | node-exporter |
| node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - 0 | node_memory_MemTotal_bytes | node-exporter |
| count(node_cpu_seconds_total{job=~"$job",mode='system',cluster=~"$cluster"}) by (instance) | node_cpu_seconds_total | node-exporter |
| sum(time() - node_boot_time_seconds{job=~"$job",cluster=~"$cluster"})by(instance) | node_boot_time_seconds | node-exporter |
| max((node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}) *100/(node_filesystem_avail_bytes {job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}+(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"})))by(instance) | node_filesystem_size_bytes | node-exporter |
| | node_filesystem_avail_bytes | node-exporter |
| | node_filesystem_free_bytes | node-exporter |
| (1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100 | node_cpu_seconds_total | node-exporter |
| (1 - (node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"})))* 100 | node_memory_MemAvailable_bytes | node-exporter |
| | node_memory_MemTotal_bytes | node-exporter |
| node_load5{job=~"$job",cluster=~"$cluster"} | node_load5 | node-exporter |
| max(irate(node_disk_written_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])) by (instance) | node_disk_written_bytes_total | node-exporter |
| max(irate(node_network_receive_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance) | node_network_receive_bytes_total | node-exporter |
| max(irate(node_network_transmit_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance) | node_network_transmit_bytes_total | node-exporter |
| node_load5{job=~"$job",cluster=~"$cluster"} | node_load5 | node-exporter |
整体总负载与整体平均 CPU 使用率 | count(node_cpu_seconds_total{job=~"$job",cluster=~"$cluster", mode='system'}) | node_cpu_seconds_total | node-exporter |
| sum(node_load5{job=~"$job",cluster=~"$cluster"}) | node_load5 | node-exporter |
| avg(1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100 | node_cpu_seconds_total | node-exporter |
整体总内存与整体平均内存使用率 | sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"}) | node_memory_MemTotal_bytes | node-exporter |
| sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"}) | node_memory_MemTotal_bytes | node-exporter |
| | node_memory_MemAvailable_bytes | node-exporter |
| (sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"}) / sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"}))*100 | node_memory_MemTotal_bytes | node-exporter |
| | node_memory_MemAvailable_bytes | node-exporter |
整体总磁盘与整体平均磁盘使用率 | sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) | node_filesystem_size_bytes | node-exporter |
| sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) | node_filesystem_size_bytes | node-exporter |
| | node_filesystem_free_bytes | node-exporter |
| (sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))) *100/(sum(avg(node_filesystem_avail_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))+(sum(avg(node_filesystem_size_bytes{job=~"$job",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)))) | node_filesystem_size_bytes | node-exporter |
| | node_filesystem_free_bytes | node-exporter |
| | node_filesystem_avail_bytes | node-exporter |
运行时间 | avg(time() - node_boot_time_seconds{instance=~"$node",cluster=~"$cluster"})
75 | node_boot_time_seconds | node-exporter |
CPU 核数 | count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'}) | node_cpu_seconds_total | node-exporter |
总内存 | sum(node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"}) | node_memory_MemTotal_bytes | node-exporter |
总 CPU 使用率 | 100 - (avg(irate(node_cpu_seconds_total{instance=~"$node",mode="idle",cluster=~"$cluster"}[5m])) * 100) | node_cpu_seconds_total | node-exporter |
内存使用率 | (1 - (node_memory_MemAvailable_bytes{instance=~"$node",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{instance=~"$node",cluster=~"$cluster"})))* 100 | node_memory_MemAvailable_bytes | node-exporter |
最大分区使用率 | (node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"})*100 /(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"})) | node_filesystem_size_bytes | node-exporter |
| | node_filesystem_free_bytes | node-exporter |
| | node_filesystem_avail_bytes | node-exporter |
CPU iowait | avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) * 100 | node_cpu_seconds_total | node-exporter |
各分区可用空间 | node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0 | node_filesystem_size_bytes | node-exporter |
| node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0 | node_filesystem_avail_bytes | node-exporter |
| (node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) | node_filesystem_size_bytes | node-exporter |
| | node_filesystem_free_bytes | node-exporter |
| | node_filesystem_avail_bytes | node-exporter |
CPU 使用率 | avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="system"}[5m])) by (instance) *100 | node_cpu_seconds_total | node-exporter |
| avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="user"}[5m])) by (instance) *100 | node_cpu_seconds_total | node-exporter |
| avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) by (instance) *100 | node_cpu_seconds_total | node-exporter |
| (1 - avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="idle"}[5m])) by (instance))*100 | node_cpu_seconds_total | node-exporter |
内存信息 | node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"} | node_memory_MemTotal_bytes | node-exporter |
| node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"} - node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"} | node_memory_MemTotal_bytes | node-exporter |
| | node_memory_MemAvailable_bytes | node-exporter |
| node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"} | node_memory_MemAvailable_bytes | node-exporter |
| (1 - (node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"} / (node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"})))* 100 | node_memory_MemAvailable_bytes | node-exporter |
| | node_memory_MemTotal_bytes | node-exporter |
每秒网络带宽使用 | irate(node_network_receive_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8 | node_network_receive_bytes_total | node-exporter |
| irate(node_network_transmit_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8 | node_network_transmit_bytes_total | node-exporter |
系统平均负载 | node_load1{cluster=~"$cluster",instance=~"$node"} | node_load1 | node-exporter |
| node_load5{cluster=~"$cluster",instance=~"$node"} | node_load5 | node-exporter |
| node_load15{cluster=~"$cluster",instance=~"$node"} | node_load15 | node-exporter |
| sum(count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'}) by (cpu,instance)) by(instance) | node_cpu_seconds_total | node-exporter |
每秒磁盘读写容量 | irate(node_disk_read_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_read_bytes_total | node-exporter |
| irate(node_disk_written_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_written_bytes_total | node-exporter |
磁盘使用率 | (node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) | node_filesystem_size_bytes | node-exporter |
| | node_filesystem_free_bytes | node-exporter |
| | node_filesystem_avail_bytes | node-exporter |
| node_filesystem_files_free{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"} / node_filesystem_files{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"} | node_filesystem_files_free | node-exporter |
磁盘读写速率(IOPS) | irate(node_disk_reads_completed_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_reads_completed_total | node-exporter |
| irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_writes_completed_total | node-exporter |
| node_disk_io_now{cluster=~"$cluster",instance=~"$node"} | node_disk_io_now | node-exporter |
每1秒内 I/O 操作耗时占比 | irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_io_time_seconds_total | node-exporter |
每次 IO 读写的耗时 | irate(node_disk_read_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_reads_completed_total{instance=~"$node"}[5m]) | node_disk_read_time_seconds_total | node-exporter |
| | node_disk_reads_completed_total | node-exporter |
| irate(node_disk_write_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_write_time_seconds_total | node-exporter |
| | node_disk_writes_completed_total | node-exporter |
| irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_io_time_seconds_total | node-exporter |
| irate(node_disk_io_time_weighted_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_disk_io_time_weighted_seconds_total | node-exporter |
网络 Socket 连接信息 | node_netstat_Tcp_CurrEstab{cluster=~"$cluster",instance=~'$node'} | node_netstat_Tcp_CurrEstab | node-exporter |
| node_sockstat_TCP_tw{cluster=~"$cluster",instance=~'$node'} | node_sockstat_TCP_tw | node-exporter |
| node_sockstat_sockets_used{cluster=~"$cluster",instance=~'$node'} | node_sockstat_sockets_used | node-exporter |
| node_sockstat_UDP_inuse{cluster=~"$cluster",instance=~'$node'} | node_sockstat_UDP_inuse | node-exporter |
| node_sockstat_TCP_alloc{cluster=~"$cluster",instance=~'$node'} | node_sockstat_TCP_alloc | node-exporter |
| irate(node_netstat_Tcp_PassiveOpens{cluster=~"$cluster",instance=~'$node'}[5m]) | node_netstat_Tcp_PassiveOpens | node-exporter |
| irate(node_netstat_Tcp_ActiveOpens{cluster=~"$cluster",instance=~'$node'}[5m]) | node_netstat_Tcp_ActiveOpens | node-exporter |
| irate(node_netstat_Tcp_InSegs{cluster=~"$cluster",instance=~'$node'}[5m]) | node_netstat_Tcp_InSegs | node-exporter |
| irate(node_netstat_Tcp_OutSegs{cluster=~"$cluster",instance=~'$node'}[5m]) | node_netstat_Tcp_OutSegs | node-exporter |
| irate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster",instance=~'$node'}[5m]) | node_netstat_Tcp_RetransSegs | node-exporter |
打开的文件描述符(左 )/每秒上下文切换次数(右) | node_filefd_allocated{cluster=~"$cluster",instance=~"$node"} | node_filefd_allocated | node-exporter |
| irate(node_context_switches_total{cluster=~"$cluster",instance=~"$node"}[5m]) | node_context_switches_total | node-exporter |
| (node_filefd_allocated{cluster=~"$cluster",instance=~"$node"}/node_filefd_maximum{cluster=~"$cluster",instance=~"$node"}) *100 | node_filefd_allocated | node-exporter |
| | node_filefd_maximum | node-exporter |
节点 Pod 监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Pods | count(kube_pod_info{node=~"$node"}) | kube_pod_info | kube-state-metrics |
Pod Request Memory | sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"})by(node) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
Pod Request CPU Cores | sum(kube_pod_container_resource_requests_cpu_cores{node=~"$node"})by(node) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU Usage | sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
CPU Quota | sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
Memory Usage | sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node", container!="", container!="POD"}) by (pod) | node_namespace_pod_container:container_memory_working_set_bytes | 预聚合指标 |
Memory Quota | sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) | node_namespace_pod_container:container_memory_working_set_bytes | 预聚合指标 |
| sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"}) by (pod) | node_namespace_pod_container:container_memory_working_set_bytes | 预聚合指标 |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~"$node"}) by (pod) | node_namespace_pod_container:container_memory_working_set_bytes | 预聚合指标 |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Pod List | group (kube_pod_info{host_ip="$node"})by(created_by_kind, created_by_name,host_network,pod_ip,pod,priority_class,namespace) | kube_pod_info | kube-state-metrics |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(kube_pod_status_phase{}==1) by (pod, phase) | kube_pod_info | kube-state-metrics |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(container_memory_working_set_bytes) by (pod) | kube_pod_info | kube-state-metrics |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(rate(container_cpu_usage_seconds_total{image!=""}[5m])) by (pod) | kube_pod_info | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(time()-kube_pod_start_time) by (pod) | kube_pod_info | kube-state-metrics |
| | kube_pod_start_time | kube-state-metrics |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) max(kube_pod_status_ready{condition="true"}) by (pod) or on() vector(0) | kube_pod_info | kube-state-metrics |
| | kube_pod_status_ready | kube-state-metrics |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_receive_bytes_total{image!=""}[5m])) by (pod) or on() vector(0) | kube_pod_info | kube-state-metrics |
| | container_network_receive_bytes_total | cadvisor |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{image!=""}[5m])) by (pod) or on() vector(0) | kube_pod_info | kube-state-metrics |
| | container_network_transmit_bytes_total | cadvisor |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_reads_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0) | kube_pod_info | kube-state-metrics |
| | container_fs_reads_bytes_total | cadvisor |
| min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_writes_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0) | kube_pod_info | kube-state-metrics |
| | container_fs_writes_bytes_total | cadvisor |
工作负载监控概览
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
CPU Usage | sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"}) | kube_resourcequota | kube-state-metrics |
| scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.cpu"}) | kube_resourcequota | kube-state-metrics |
CPU Quota | count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type) | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
/sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
/sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Memory Usage | sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"}) | kube_resourcequota | kube-state-metrics |
| scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"}) | kube_resourcequota | kube-state-metrics |
Memory Quota | count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type) | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
| container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
| kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
/sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
/sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标
Deployment |
Deployment
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Age | time() - max(kube_deployment_created{cluster="$cluster",namespace="$namespace",deployment="$workload"}) | kube_deployment_created | kube-state-metrics |
Replicas(Pods)-Request | max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) | kube_deployment_spec_replicas | kube-state-metrics |
Replicas(Pods)-Ready | max(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"}) | kube_deployment_status_replicas_ready | kube-state-metrics |
Replica Trend | max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_deployment_spec_replicas | kube-state-metrics |
| max(kube_deployment_status_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_deployment_status_replicas | kube-state-metrics |
| min(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_deployment_status_replicas_ready | kube-state-metrics |
| min(kube_deployment_status_replicas_available{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_deployment_status_replicas_available | kube-state-metrics |
| min(kube_deployment_status_replicas_updated{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_deployment_status_replicas_updated | kube-state-metrics |
| min(kube_deployment_status_replicas_unavailable{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_deployment_status_replicas_unavailable | kube-state-metrics |
CPU Usage | sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
CPU Quota | sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod)
/sum(
kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod)
/sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
CPU Limit-Total | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
CPU Request-Total | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU Info | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| max(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| max(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
CPU Usage/Limit (%) | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
CPU Usage/Request(%) | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU User Time(%) | avg(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container))) by (pod,container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| | container_cpu_usage_seconds_total | cadvisor |
| | container_cpu_usage_seconds_total | cadvisor |
Memory Usage | sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Memory Quota | sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod)
/sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod)
/sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Memory Limit-Total | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!=""}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_spec_memory_limit_bytes | cadvisor |
Memory Request-Total | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
Memory Info | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"}) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| max(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| max(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Memory Usage/Limit(%) | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Memory Usage/Request(%) | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Sockets | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_sockets | cadvisor |
Network In | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_network_receive_bytes_total | cadvisor |
Network Out | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_network_transmit_bytes_total | cadvisor |
Network Errors | sum(label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod))) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_network_receive_errors_total | cadvisor |
| | container_network_transmit_errors_total | cadvisor |
Network IO | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_network_receive_bytes_total | cadvisor |
| label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_network_transmit_bytes_total | cadvisor |
File System Read | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(rate(container_fs_reads_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_fs_reads_bytes_total | cadvisor |
File System Write | label_replace(
max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node),
"replicaset",
"$1",
"created_by_name",
"(.+)"
) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset)
* on(pod) group_right() max(rate(container_fs_writes_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container) | kube_pod_info | kube-state-metrics |
| | kube_replicaset_owner | kube-state-metrics |
| | container_fs_writes_bytes_total | cadvisor |
StatefulSet
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Generation | max(kube_statefulset_metadata_generation{cluster="$cluster",namespace="$namespace", statefulset="$workload"}) | kube_statefulset_metadata_generation | kube-state-metrics |
Replicas(Pods)-Request | max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) | kube_statefulset_replicas | kube-state-metrics |
Replicas(Pods)-Ready | max(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) | kube_statefulset_status_replicas_ready | kube-state-metrics |
Age | time() - max(kube_statefulset_created{cluster="$cluster",namespace="$namespace",statefulset="$workload"}) | kube_statefulset_created | kube-state-metrics |
Replica Trend | max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_statefulset_replicas | kube-state-metrics |
| max(kube_statefulset_status_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_statefulset_status_replicas | kube-state-metrics |
| min(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_statefulset_status_replicas_ready | kube-state-metrics |
| min(kube_statefulset_status_replicas_available{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_statefulset_status_replicas_available | kube-state-metrics |
| min(kube_statefulset_status_replicas_updated{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod) | kube_statefulset_status_replicas_updated | kube-state-metrics |
CPU Usage | sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
CPU Quota | sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod)
/sum(
kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod)
/sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
CPU Limit-Total | sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
CPU Request-Total | sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU Info | group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image) | kube_pod_info | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image) | kube_pod_info | kube-state-metrics |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image) | kube_pod_info | kube-state-metrics |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU Usage/Limit (%) | group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
CPU Usage/Request(%) | group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | container_cpu_usage_seconds_total | cadvisor |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
CPU User Time(%) | avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container,image))) by (pod,container,image) | kube_pod_info | kube-state-metrics |
| | container_cpu_user_seconds_total | cadvisor |
| | container_cpu_user_seconds_total | cadvisor |
| | container_cpu_system_seconds_total | cadvisor |
Memory Usage | sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Memory Quota | sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod)
/sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod)
/sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Memory Limit-Total | sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod)) | kube_pod_info | kube-state-metrics |
| | container_spec_memory_limit_bytes | cadvisor |
Memory Request-Total | sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod)) | kube_pod_info | kube-state-metrics |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
Memory Info | avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max by(container, pod, image) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"}))by (container, pod, image) | kube_pod_info | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max by(container, pod, image) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container) | kube_pod_info | kube-state-metrics |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container) | kube_pod_info | kube-state-metrics |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Memory Usage/Limit(%) | group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Memory Usage/Request(%) | group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) | kube_pod_info | kube-state-metrics |
| | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
Sockets | sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod)) | kube_pod_info | kube-state-metrics |
| | container_sockets | cadvisor |
Network In | sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)) | kube_pod_info | kube-state-metrics |
| | container_network_receive_bytes_total | cadvisor |
Network Out | sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)) | kube_pod_info | kube-state-metrics |
| | container_network_transmit_bytes_total | cadvisor |
Network Errors | sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod))) | kube_pod_info | kube-state-metrics |
| | container_network_receive_errors_total | cadvisor |
| | container_network_transmit_errors_total | cadvisor |
Network IO | sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod) | kube_pod_info | kube-state-metrics |
| | container_network_receive_bytes_total | cadvisor |
| -sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod)
* on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod) | kube_pod_info | kube-state-metrics |
| | container_network_transmit_bytes_total | cadvisor |
DaemonSet
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
CPU Usage | sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
CPU Quota | sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod)
/sum(
kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod)
/sum(
kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Memory Usage | sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Memory Quota | sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod)
/sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | container_memory_working_set_bytes | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sum(
container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod)
/sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"}
) by (pod) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
集群 Pod 监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Age | time() - max(kube_pod_created{pod=~"$pod",cluster="$cluster",namespace="$namespace"}) | kube_pod_created | kube-state-metrics |
Restart Count-Last 1 Hour | ceil(sum (increase(kube_pod_container_status_restarts_total{pod=~"$pod",cluster="$cluster",namespace="$namespace"}[1h]))) | kube_pod_container_status_restarts_total | kube-state-metrics |
Requests-CPU | sum(kube_pod_container_resource_requests_cpu_cores{pod=~"$pod"}) or vector(0) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
Requests-Memory | sum(kube_pod_container_resource_requests_memory_bytes{pod=~"$pod"}) or vector(0) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
Limits-CPU | sum(kube_pod_container_resource_limits_cpu_cores{pod=~"$pod"}) or vector(0) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
Limits-Memory | sum(kube_pod_container_resource_limits_memory_bytes{pod=~"$pod"}) or vector(0) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Containers | group by (image, container,pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace", pod=~"$pod"}) | kube_pod_container_info | kube-state-metrics |
| sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace", pod=~"$pod"}) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace", pod=~"$pod"}) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace", pod=~"$pod"}) | kube_pod_container_status_running | kube-state-metrics |
| sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace", pod=~"$pod"}) | kube_pod_container_resource_limits | kube-state-metrics |
| sum by (container,pod)(kube_pod_container_resource_limits{resource="memory",cluster="$cluster",namespace="$namespace", pod=~"$pod"}) | kube_pod_container_resource_limits | kube-state-metrics |
| max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace", pod=~"$pod"}) | kube_pod_container_status_restarts_total | kube-state-metrics |
CPU Usage (%) | max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (container,namespace,pod) / max(container_spec_cpu_quota{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000) by (container,namespace,pod) or on() vector(0) | container_cpu_usage_seconds_total | cadvisor |
| | container_spec_cpu_quota | cadvisor |
CPU Usage By Cores | max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (pod,container,namespace)or on() vector(0) | container_cpu_usage_seconds_total | cadvisor |
CPU Load (10s) | max(container_cpu_load_average_10s{namespace=~"$namespace", pod=~"$pod", container!="", container!="POD"} / 1000)by(pod,container) | container_cpu_load_average_10s | cadvisor |
CPU Throttled Percent | max (rate (container_cpu_cfs_throttled_seconds_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) / max (rate (container_cpu_cfs_periods_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) or on() vector(0) | container_cpu_cfs_throttled_seconds_total | cadvisor |
| | container_cpu_cfs_periods_total | cadvisor |
CPU Quota | sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_requests_cpu_cores | kube-state-metrics |
| sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
| sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate | 预聚合指标 |
| | kube_pod_container_resource_limits_cpu_cores | kube-state-metrics |
Memory Usage (WSS) | max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container) | container_memory_working_set_bytes | cadvisor |
Memory Usage | max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container) | container_memory_usage_bytes | cadvisor |
Memory Usage (RSS) | max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container) or on() vector(0) | container_memory_rss | cadvisor |
Memory Cache | max(container_memory_cache{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container) | container_memory_cache | cadvisor |
Usage WSS/Limit (%) | (max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0) | container_memory_working_set_bytes | cadvisor |
| | container_spec_memory_limit_bytes | cadvisor |
Usage/Limit (%) | (max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0) | container_memory_usage_bytes | cadvisor |
| | container_spec_memory_limit_bytes | cadvisor |
Usage RSS/Limit (%) | (max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ sum(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0) | container_memory_rss | cadvisor |
| | container_spec_memory_limit_bytes | cadvisor |
Memory Failcnt | max (increase(container_memory_failcnt{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (pod,container) | container_memory_failcnt | cadvisor |
Memory Quota | sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) | container_memory_working_set_bytes | cadvisor |
| sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace", pod="$pod"}) by (container) | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_requests_memory_bytes | kube-state-metrics |
| sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
| sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace", pod="$pod"}) by (container) | container_memory_working_set_bytes | cadvisor |
| | kube_pod_container_resource_limits_memory_bytes | kube-state-metrics |
Network Input | max (rate (container_network_receive_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m])) by(pod) | container_network_receive_bytes_total | cadvisor |
Network Output | max (rate (container_network_transmit_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m]))by(pod) | container_network_transmit_bytes_total | cadvisor |
Network Input Error (%) | max (increase (container_network_receive_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) | container_network_receive_packets_dropped_total | cadvisor |
| | container_network_receive_packets_total | cadvisor |
| max (increase (container_network_receive_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) | container_network_receive_errors_total | cadvisor |
| | container_network_receive_packets_total | cadvisor |
Network Output Error (%) | max (increase (container_network_transmit_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_transmit_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) | container_network_transmit_packets_dropped_total | cadvisor |
| | container_network_transmit_packets_total | cadvisor |
| max (increase (container_network_transmit_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) | container_network_transmit_errors_total | cadvisor |
| | container_network_receive_packets_total | cadvisor |
File System Read | max (rate(container_fs_reads_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m]))by (container,pod) | container_fs_reads_bytes_total | cadvisor |
File System Write | max (rate(container_fs_writes_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (container,pod) | container_fs_writes_bytes_total | cadvisor |
Network Socket | max(container_sockets{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod) | container_sockets | cadvisor |
Process Number | count(container_processes{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod) | container_processes | cadvisor |
集群网络监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Current Rate of Bytes Received | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_bytes_total | cadvisor |
Current Rate of Bytes Transmitted | sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_bytes_total | cadvisor |
Current Status | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_bytes_total | cadvisor |
| sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_bytes_total | cadvisor |
| sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_bytes_total | cadvisor |
| sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_bytes_total | cadvisor |
| sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_packets_total | cadvisor |
| sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_packets_total | cadvisor |
| sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_packets_total | cadvisor |
| sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_packets_dropped_total | cadvisor |
Average Rate of Bytes Received | sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_bytes_total | cadvisor |
Average Rate of Bytes Transmitted | sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_bytes_total | cadvisor |
Receive Bandwidth | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_bytes_total | cadvisor |
Transmit Bandwidth | sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_bytes_total | cadvisor |
Rate of Received Packets | sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_packets_total | cadvisor |
Rate of Transmitted Packets | sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_packets_total | cadvisor |
Rate of Received Packets Dropped | sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_receive_packets_dropped_total | cadvisor |
Rate of Transmitted Packets Dropped | sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace)) | container_network_transmit_packets_dropped_total | cadvisor |
Rate of TCP Retransmits out of all sent segments | sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[5m]) / rate(node_netstat_Tcp_OutSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance)) | node_netstat_Tcp_RetransSegs | node-exporter |
Rate of TCP SYN Retransmits out of all retransmits | sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=~"$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance)) | node_netstat_TcpExt_TCPSynRetrans | node-exporter |
| | node_netstat_Tcp_RetransSegs | node-exporter |
命名空间 Pods 网络监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Current Rate of Bytes Received | sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) | container_network_receive_bytes_total | cadvisor |
Current Rate of Bytes Transmitted | sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[5m])) | container_network_transmit_bytes_total | cadvisor |
Current Status | sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_receive_bytes_total | cadvisor |
| sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_transmit_bytes_total | cadvisor |
| sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_receive_packets_total | cadvisor |
| sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_transmit_packets_total | cadvisor |
| sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_receive_packets_dropped_total | cadvisor |
| sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_transmit_packets_dropped_total | cadvisor |
Receive Bandwidth | sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_receive_bytes_total | cadvisor |
Transmit Bandwidth | sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_transmit_bytes_total | cadvisor |
Rate of Received Packets | sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_receive_packets_total | cadvisor |
Rate of Transmitted Packets | sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_transmit_packets_total | cadvisor |
Rate of Received Packets Dropped | sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod) | container_network_receive_packets_dropped_total | cadvisor |
Rate of Transmitted Packets Dropped | sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster", namespace=~"$namespace"}[5m])) by (pod) | container_network_transmit_packets_dropped_total | cadvisor |
命名空间工作负载网络监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Current Rate of Bytes Received | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Current Rate of Bytes Transmitted | sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Current Status | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_packets_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_packets_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_packets_dropped_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
| sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_packets_dropped_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Average Rate of Bytes Received | sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Average Rate of Bytes Transmitted | sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Receive Bandwidth | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Transmit Bandwidth | sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Rate of Received Packets | sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_packets_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Rate of Transmitted Packets | sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_packets_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Rate of Received Packets Dropped | sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_receive_packets_dropped_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Rate of Transmitted Packets Dropped | sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) | container_network_transmit_packets_dropped_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Pod 网络监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Current Rate of Bytes Received | sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) | container_network_receive_bytes_total | cadvisor |
Current Rate of Bytes Transmitted | sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) | container_network_transmit_bytes_total | cadvisor |
Receive Bandwidth | sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod) | container_network_receive_bytes_total | cadvisor |
Transmit Bandwidth | sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod) | container_network_transmit_bytes_total | cadvisor |
Rate of Received Packets | sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod) | container_network_receive_packets_total | cadvisor |
Rate of Transmitted Packets | sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod) | container_network_transmit_packets_total | cadvisor |
Rate of Received Packets Dropped | sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod) | container_network_receive_packets_dropped_total | cadvisor |
Rate of Transmitted Packets Dropped | sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod) | container_network_transmit_packets_dropped_total | cadvisor |
工作负载网络监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Current Rate of Bytes Received | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_transmit_bytes_total | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | cadvisor |
Current Rate of Bytes Transmitted | sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_transmit_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Average Rate of Bytes Received | sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_receive_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Average Rate of Bytes Transmitted | sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_transmit_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Receive Bandwidth | sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_receive_bytes_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Transmit Bandwidth | sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_transmit_bytes_total | cadvisor |
namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 | | |
Rate of Received Packets | sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_receive_packets_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Rate of Transmitted Packets | sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_transmit_packets_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
Rate of Received Packets Dropped | sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_receive_packets_dropped_total | 预聚合指标 |
| | namespace_workload_pod:kube_pod_owner:relabel | cadvisor |
Rate of Transmitted Packets Dropped | sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) | container_network_transmit_packets_dropped_total | cadvisor |
| | namespace_workload_pod:kube_pod_owner:relabel | 预聚合指标 |
PVC 存储监控
图表名称 | 查询语句 | 使用的指标 | 配置文件 |
Volume Space Usage | (
sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
-
sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
) | kubelet_volume_stats_capacity_bytes | kubelet |
| | kubelet_volume_stats_available_bytes | kubelet |
| sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) | kubelet_volume_stats_available_bytes | kubelet |
Volume Space Usage | (
kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}
-
kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}
)
/
kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}
* 100 | kubelet_volume_stats_capacity_bytes | kubelet |
| | kubelet_volume_stats_available_bytes | kubelet |
| | kubelet_volume_stats_capacity_bytes | kubelet |
Volume inodes Usage | sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) | kubelet_volume_stats_inodes_used | kubelet |
| (
sum without(instance, node) (kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
-
sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
) | kubelet_volume_stats_inodes | kubelet |
| | kubelet_volume_stats_inodes_used | kubelet |
Volume inodes Usage | kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}
/
kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}
* 100 | kubelet_volume_stats_inodes_used | kubelet |
| | kubelet_volume_stats_inodes | kubelet |