Last active
February 12, 2019 20:34
-
-
Save portante/c801acce8ae275fdcfc743b346981b13 to your computer and use it in GitHub Desktop.
A simple script to display the "health" of an ES cluster
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
ES_URL='https://localhost:9200' | |
curl_get='curl -s -X GET --cacert /etc/elasticsearch/secret/admin-ca --cert /etc/elasticsearch/secret/admin-cert --key /etc/elasticsearch/secret/admin-key' | |
date | |
$curl_get $ES_URL/_cat/health?v | |
$curl_get $ES_URL/_cat/allocation?v\&h=node,host,ip,shards,disk.indices,disk.used,disk.avail,disk.total,disk.percent | |
# See https://www.elastic.co/guide/en/elasticsearch/reference/2.4/cat-nodes.html for header meanings | |
$curl_get $ES_URL/_cat/nodes?v\&h=name,host,r,m,hc,hp,hm,rc,rp,rm,fdc,fdp,fdm,load,uptime | |
$curl_get $ES_URL/_cat/nodes?v\&h=name,host,r,m,fm,fe,qcm,qce,rcm,rce,rchc,rcmc,ft,ftt,rto,rti,sc,sm,siwm,siwmx,svmm | |
$curl_get $ES_URL/_cat/nodes?v\&h=name,host,r,m,gc,gto,gti,iic,iito,iiti,mc,mt,mtt,sfc,sfto,sfti,sqc,sqto,sqti | |
$curl_get $ES_URL/_cat/thread_pool?v\&h=host,bulk.rejected,bulk.completed,bulk.queue,bulk.queueSize,bulk.size,bulk.active,bulk.largest | |
$curl_get $ES_URL/_cat/thread_pool?v\&h=host,refresh.rejected,refresh.completed,refresh.queue,refresh.size,refresh.active,refresh.largest | |
$curl_get $ES_URL/_cat/thread_pool?v\&h=host,management.rejected,management.completed,management.queue,management.size,management.active,management.largest |
Here is an example output from a healthy cluster:
Thu May 24 20:54:33 UTC 2018
epoch timestamp cluster status node.total node.data shards pri relo init unassign pending_tasks max_task_wait_time active_shards_percent
1527195273 20:54:33 logging-es green 5 5 6478 3239 0 0 0 0 - 100.0%
node host ip shards disk.indices disk.used disk.avail disk.total disk.percent
logging-es-data-master-2esm8hxv 10.129.28.65 10.129.28.65 1295 33gb 151.1gb 1.8tb 1.9tb 7
logging-es-data-master-hge0qoxy 10.131.22.69 10.131.22.69 1296 23gb 137.4gb 1.8tb 1.9tb 6
logging-es-data-master-bal6ofun 10.131.22.68 10.131.22.68 1296 22.4gb 136.2gb 1.8tb 1.9tb 6
logging-es-data-master-hpg82wh6 10.128.2.66 10.128.2.66 1296 22.5gb 136.6gb 1.8tb 1.9tb 6
logging-es-data-master-emt7f0y0 10.129.28.64 10.129.28.64 1295 32.3gb 150.3gb 1.8tb 1.9tb 7
name host r m hc hp hm rc rp rm fdc fdp fdm load uptime
logging-es-data-master-hpg82wh6 10.128.2.66 d * 7.7gb 38 19.8gb 61.8gb 99 62.7gb 11817 1 1048576 15.17 1.4d
logging-es-data-master-bal6ofun 10.131.22.68 d - 9.5gb 48 19.7gb 153.3gb 98 157.2gb 12058 1 1048576 42.79 1.4d
logging-es-data-master-emt7f0y0 10.129.28.64 d - 7.5gb 38 19.7gb 155.1gb 99 157.2gb 13361 1 1048576 79.74 1.4d
logging-es-data-master-hge0qoxy 10.131.22.69 d m 10gb 50 19.7gb 153.3gb 98 157.2gb 12270 1 1048576 42.79 1.4d
logging-es-data-master-2esm8hxv 10.129.28.65 d m 6.7gb 34 19.7gb 155.1gb 99 157.2gb 12301 1 1048576 81.72 1.4d
name host r m fm fe qcm qce rcm rce rchc rcmc ft ftt rto rti sc sm siwm siwmx svmm
logging-es-data-master-hpg82wh6 10.128.2.66 d * 0b 0 0b 0 0b 0 0 0 32493 1.8h 9537651 4.8d 6121 115.1mb 385.4mb 3.2gb 324.1kb
logging-es-data-master-bal6ofun 10.131.22.68 d - 0b 0 328b 0 0b 0 0 0 33041 1.9h 10013174 4.9d 6178 114.3mb 463.4mb 3.1gb 440.8kb
logging-es-data-master-emt7f0y0 10.129.28.64 d - 0b 0 0b 0 0b 0 0 0 23926 1.1h 7375397 3.4d 6538 133.3mb 808.3mb 3.7gb 537.6kb
logging-es-data-master-hge0qoxy 10.131.22.69 d m 0b 0 328b 0 0b 0 0 0 31700 1.6h 10575243 5d 6038 115.1mb 388mb 3.1gb 453.9kb
logging-es-data-master-2esm8hxv 10.129.28.65 d m 0b 0 0b 0 0b 0 0 0 28048 1h 8368253 3.8d 6182 131.9mb 137.5mb 3.8gb 66.2kb
name host r m gc gto gti iic iito iiti mc mt mtt sfc sfto sfti sqc sqto sqti
logging-es-data-master-hpg82wh6 10.128.2.66 d * 0 586 89ms 1641378 77655281 3.6d 7 1179457 9.5d 0 0 0s 0 0 0s
logging-es-data-master-bal6ofun 10.131.22.68 d - 0 126 23ms 3003419 61520394 5.6d 4 1235672 8.6d 0 17137 19s 0 17137 44.9s
logging-es-data-master-emt7f0y0 10.129.28.64 d - 0 245 318ms 7089820 46963347 3.5d 25 908099 6d 0 11 10ms 0 11 16ms
logging-es-data-master-hge0qoxy 10.131.22.69 d m 0 121 189ms 2842407 56871770 4.2d 0 1252566 9d 0 17152 16.2s 0 17152 43.6s
logging-es-data-master-2esm8hxv 10.129.28.65 d m 0 102 120ms 4718014 47393183 3.7d 1 1014222 6.8d 0 6 276ms 0 6 62ms
host bulk.rejected bulk.completed bulk.queue bulk.queueSize bulk.size bulk.active bulk.largest
10.128.2.66 18559 22716322 0 1296 16 3 16
10.131.22.68 9 19595847 0 1296 32 0 32
10.129.28.64 0 16328617 0 1296 32 4 32
10.131.22.69 0 18980485 0 1296 32 0 32
10.129.28.65 0 16016642 690 1296 32 32 32
host refresh.rejected refresh.completed refresh.queue refresh.size refresh.active refresh.largest
10.128.2.66 0 9537758 0 8 1 8
10.131.22.68 0 10013490 1 10 10 10
10.129.28.64 0 7375554 11 10 10 10
10.131.22.69 0 10575571 0 10 7 10
10.129.28.65 0 8368308 0 10 2 10
host management.rejected management.completed management.queue management.size management.active management.largest
10.128.2.66 0 44043 0 4 1 4
10.131.22.68 0 40816 0 3 1 3
10.129.28.64 0 40791 0 3 1 3
10.131.22.69 0 40815 0 3 1 3
10.129.28.65 0 40794 0 3 1 3
Here is an example from a cluster with unassigned shards:
Thu May 24 20:56:43 UTC 2018
epoch timestamp cluster status node.total node.data shards pri relo init unassign pending_tasks max_task_wait_time active_shards_percent
1527195403 20:56:43 logging-es red 3 3 5659 2828 0 0 12 0 - 99.8%
node host ip shards disk.indices disk.used disk.avail disk.total disk.percent
logging-es-data-master-gkvb8lj8 10.129.0.29 10.129.0.29 1576 27.4gb 135.2gb 1.8tb 1.9tb 6
logging-es-data-master-ml8zmsl7 10.128.0.77 10.128.0.77 2111 61.9gb 173.6gb 1.7tb 1.9tb 8
logging-es-data-master-zolfd11v 10.129.0.30 10.129.0.30 1972 49.3gb 160.4gb 1.8tb 1.9tb 7
UNASSIGNED 12
name host r m hc hp hm rc rp rm fdc fdp fdm load uptime
logging-es-data-master-gkvb8lj8 10.129.0.29 d * 16.3gb 53 30.7gb 153.4gb 98 157.2gb 14818 1 1048576 31.14 21.3h
logging-es-data-master-ml8zmsl7 10.128.0.77 d m 18.1gb 58 30.7gb 155.8gb 99 157.2gb 19916 1 1048576 23.57 21.1h
logging-es-data-master-zolfd11v 10.129.0.30 d m 21.2gb 69 30.7gb 153.4gb 98 157.2gb 17992 1 1048576 31.14 21.3h
name host r m fm fe qcm qce rcm rce rchc rcmc ft ftt rto rti sc sm siwm siwmx svmm
logging-es-data-master-gkvb8lj8 10.129.0.29 d * 0b 0 0b 0 0b 0 0 0 29950 44.7m 5557638 1.1d 7928 140.3mb 308.8mb 3.4gb 148.2kb
logging-es-data-master-ml8zmsl7 10.128.0.77 d m 0b 0 0b 0 0b 0 0 0 38451 1.2h 8177223 2.9d 10510 249.7mb 454mb 5gb 601.6kb
logging-es-data-master-zolfd11v 10.129.0.30 d m 0b 0 0b 0 0b 0 0 0 33444 1h 7602006 2.9d 9331 203.5mb 458.7mb 4.5gb 492.6kb
name host r m gc gto gti iic iito iiti mc mt mtt sfc sfto sfti sqc sqto sqti
logging-es-data-master-gkvb8lj8 10.129.0.29 d * 0 147 36ms 1 48861959 21h 7 599867 2d 0 22386 1.5s 0 22386 2.5s
logging-es-data-master-ml8zmsl7 10.128.0.77 d m 0 104 70ms 0 95514486 2.4d 9 944469 4.9d 0 22148 2.6s 0 22148 5.7s
logging-es-data-master-zolfd11v 10.129.0.30 d m 0 195 47ms 5 79627575 2.9d 2 903139 4.5d 0 10225 1.7s 0 10225 1.8s
host bulk.rejected bulk.completed bulk.queue bulk.queueSize bulk.size bulk.active bulk.largest
10.129.0.29 133901 10775966 0 50 32 0 32
10.128.0.77 2548220 21183026 43 50 32 32 32
10.129.0.30 480337 17562786 0 50 32 1 32
host refresh.rejected refresh.completed refresh.queue refresh.size refresh.active refresh.largest
10.129.0.29 0 5558012 4 10 10 10
10.128.0.77 0 8177729 0 10 0 10
10.129.0.30 0 7602586 0 10 3 10
host management.rejected management.completed management.queue management.size management.active management.largest
10.129.0.29 0 30634 0 5 1 5
10.128.0.77 0 28499 0 3 1 3
10.129.0.30 0 28564 0 3 1 3
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example output: