Last active
March 4, 2025 10:50
-
-
Save roylez/a2f61802206d3ab7905f81254651d428 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/gawk -f | |
# | |
# Usage: ./rabbit-tell.awk <rabbitmqctl_report> | |
# | |
# Common RabbitMQ issues that this script can be used to identify | |
# | |
# - Partitioned cluster | |
# | |
# - High binary/queue memory usage. There are mainly two causes of this: | |
# | |
# + queue depth build up (some clients are disconnected) | |
# + rabbit does not do periodic garbage collection by default | |
# | |
# - High fd/socket usage. This is usually caused by excessive connections that can be analyzed | |
# together with connection breakdown | |
# | |
# - Unbalanced queue master count on nodes. Usually this means "queue_master_locator"(only in 3.6 | |
# and newer) should be tuned. | |
# | |
function round(x) { return sprintf("%.2f", x) } | |
function format_status_line(label, keys, array, color) | |
{ | |
if (color) { printf "\033[%s;1m", color } | |
printf "%-15s", label | |
for(i in keys) { printf "%23s", array[i] } | |
if (color) { printf "%s", "\033[m" } | |
print "" | |
} | |
function humanize(x) | |
{ | |
split("B KB MB GB TB PB",type) | |
y=0 | |
for(i=5;y<1;i--) y = x / (2**(10*i)) | |
return round(y) type[i+2] | |
} | |
function colorize(s, color) { return "\033[" color "m" s "\033[m" } | |
function print_title(s) { printf "\n\033[4;1m%s\033[m\n\n", toupper(s) } | |
function get_index(array, value) { for (ind in array) { if (array[ind]==value) { return ind } } } | |
# remove color junk for 3.8+ | |
{ gsub(/\x1B\[[0-9;]*[mK]/,"") } | |
/^Status of node/ { | |
section="node" | |
gsub(/'/, "") | |
node=substr($4, 8) | |
nodes[node]=node | |
if ( ! /\.\.\.$/ ) { ver_36 = 1 } | |
} | |
!NF { section=0; next } | |
# section header parsing | |
section && !header_parsed { | |
for (i=1; i<NF; i++) { header[i] = $i } | |
header_parsed=1 | |
next | |
} | |
# 36 format parser {{{ | |
ver_36 && section=="node" && /rabbit,"RabbitMQ",/ { | |
match($0, /,"([0-9]+\.[0-9]+\.[0-9]+)/, res) | |
version[node]=res[1] | |
} | |
ver_36 && section=="node" && /(total|binary|vm_memory_limit|queue_procs|queue_slave_procs|processes|uptime),/ { | |
match($0, /,([0-9]+)/, res) | |
if ( /total/ ) mem_total[node]=res[1] | |
if ( /binary/ ) mem_binary[node]=res[1] | |
if ( /vm_memory_limit/ ) mem_limit[node]=res[1] | |
if ( /queue_procs|queue_slave_procs/ ) mem_queue[node]+=res[1] | |
if ( /uptime/ ) uptime[node]=res[1] | |
if ( /processes/ ) { | |
match($0, /used,([0-9]+)/, res) | |
process_used[node]=res[1] | |
} | |
} | |
ver_36 && section=="node" && /(total_limit|total_used|sockets_limit|sockets_used),/ { | |
match($0, /,([0-9]+)/, res) | |
if ( /total_limit/ ) fd_total[node]=res[1] | |
if ( /total_used/ ) fd_used[node]=res[1] | |
if ( /sockets_limit/ ) sockets_limit[node]=res[1] | |
if ( /sockets_used/ ) sockets_used[node]=res[1] | |
} | |
ver_36 && /^Cluster status of node/ { | |
gsub(/'/, "") | |
section="cluster" | |
node=substr($5, 8) | |
next | |
} | |
ver_36 && section=="cluster" && /partitions,/ { | |
partitioned[node] = $0 ~ /partitions,\[\]/ ? "NO" : "YES" | |
} | |
ver_36 && /^Connections:/ { | |
section="connection" | |
FS="\t" | |
header_parsed=0 | |
next | |
} | |
ver_36 && /^Channels:/ { | |
section="channel" | |
header_parsed=0 | |
next | |
} | |
ver_36 && /^Queues on/ { | |
match($0, /Queues on (.+):/, res) | |
vhost=res[1] | |
section="queue" | |
header_parsed=0 | |
next | |
} | |
ver_36 && /^Exchanges on/ { | |
section="exchange" | |
header_parsed=0 | |
next | |
} | |
# }}} | |
# 3.8+ formattings {{{ | |
!ver_36 && /\.\.\.$/ { section=0 } | |
!ver_36 && /^Listing connections/ { | |
FS="\t" | |
section="connection" | |
header_parsed=0 | |
next | |
} | |
!ver_36 && /^Listing queues for/ { | |
match($0, /^Listing queues for vhost (.+) /, res) | |
vhost=res[1] | |
section="queue" | |
header_parsed=0 | |
next | |
} | |
# }}} | |
# data sections {{{ | |
section=="connection" { | |
client_property = $(get_index(header, "client_properties")) | |
u = $(get_index(header, "user")) | |
match(client_property, /connection_name","([^:]*):/, res) | |
c=res[1] | |
user[u]++ | |
uc[c]=u | |
client[c]++ | |
if (! u ) { print } | |
} | |
section=="queue" { | |
pid = $(get_index(header, "pid")) | |
name = $(get_index(header, "name")) | |
messages = $(get_index(header, "messages")) | |
match(pid, /@([^.]+)\./, res) | |
queue_master[res[1]]++ | |
if ( messages ) { | |
queue_vhost[name]=vhost | |
queue_messages[name]=messages | |
queue_consumers[name]=$(get_index(header, "consumers")) | |
} | |
} | |
# }}} | |
# outputs {{{ | |
END { | |
if (ver_36) { | |
print_title("cluster nodes") | |
format_status_line("node", nodes, nodes) | |
format_status_line("version", nodes, version) | |
format_status_line("uptime", nodes, uptime) | |
for(n in nodes) { m_limit[n] = humanize(mem_limit[n]) } | |
format_status_line("mem_limit", nodes, m_limit) | |
for(n in nodes) { | |
percent = round(mem_total[n]/mem_limit[n]*100) | |
if (int(percent) > 80) { m_total_color=31 } | |
m_total[n]="(" percent "%) " humanize(mem_total[n]) | |
} | |
format_status_line("mem_total", nodes, m_total, m_total_color) | |
for(n in nodes) { | |
percent = round(mem_binary[n]/mem_total[n]*100) | |
if (int(percent) > 50) { m_binary_color=31 } | |
m_binary[n]="(" percent "%) " humanize(mem_binary[n]) | |
} | |
format_status_line("mem_binary", nodes, m_binary, m_binary_color) | |
for(n in nodes) { | |
percent = round(mem_queue[n]/mem_total[n]*100) | |
if (int(percent) > 50) { m_queue_color=31 } | |
m_binary[n]="(" percent "%) " humanize(mem_queue[n]) | |
} | |
format_status_line("mem_queue", nodes, m_binary, m_queue_color) | |
for(n in nodes) { | |
percent = round(fd_used[n]/fd_total[n]*100) | |
if (int(percent) > 50) { fd_color=31 } | |
fd[n] = "(" percent "%) " fd_used[n] | |
} | |
format_status_line("fd", nodes, fd, fd_color) | |
for(n in nodes) { | |
percent = round(sockets_used[n]/sockets_limit[n]*100) | |
if (int(percent) > 50) { sockets_color=31 } | |
sockets[n] = "(" percent "%) " sockets_used[n] | |
} | |
format_status_line("sockets", nodes, sockets, sockets_color) | |
for(n in nodes) { | |
percent = round(process_used[n]/1048576*100) | |
if (int(percent) > 50) { process_color=31 } | |
process[n] = "(" percent "%) " process_used[n] | |
} | |
format_status_line("process_used", nodes, process, process_color) | |
for(n in nodes) { | |
if ( partitioned[n] == "YES" ) { partition_color=31 } | |
} | |
format_status_line("partitioned", nodes, partitioned, partition_color) | |
} | |
print_title("connections breakdown by user/client") | |
for(u in user) { | |
print u, user[u]": " | |
for(c in client) { if(uc[c]==u) {print "- "c,client[c]}} | |
} | |
print_title("queues with messages") | |
if (length(queue_messages)) { | |
printf "%-15s%45s%10s%10s\n", "vhost", "queue", "messages", "consumers" | |
for(q in queue_messages) { | |
printf "%-15s%45s%10s%10s\n", queue_vhost[q], q, queue_messages[q], queue_consumers[q] | |
} | |
} | |
print_title("queue master count by node") | |
for(n in queue_master) { print n ": \t" queue_master[n] } | |
} | |
# }}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment