Created
January 18, 2024 11:37
-
-
Save jsharkey13/4efafa299b334b40b329ebc57ef0a3e4 to your computer and use it in GitHub Desktop.
Analyse zpool disk latencies with graphical output.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# zpool disk latency visualisation | |
# | |
# This script parses the output of the zpool stats command: | |
# zpool iostat -pvwH | |
# which provides per-disk latency information for reads and writes. | |
# | |
# Run the command: | |
# | |
# zpool iostat -pvwH > output.tsv | |
# | |
# and copy the resulting file to the same folder as this script, then run | |
# this script. | |
# | |
# The ["tank"] list can be replaced by any names to exclude (i.e. the pool and groups) | |
# and "zpool" can be changed to an appropriate title. | |
# | |
import matplotlib.pyplot as plt | |
import numpy as np | |
__all__ = ["parse_output", "display_graphs"] | |
COLUMN_HEADINGS = ["total_wait:read", "total_wait:write", | |
"disk_wait:read", "disk_wait:write", | |
"syncq_wait:read", "syncq_wait:write", | |
"asyncq_wait:read", "asyncq_wait:write", | |
"scrub", "trim"] | |
BUCKETS = ["1ns", "3ns", "7ns", "15ns", "31ns", "63ns", "127ns", "255ns", "511ns", | |
"1us", "2us", "4us", "8us", "16us", "32us", "65us", "131us", "262us", "524us", | |
"1ms", "2ms", "4ms", "8ms", "16ms", "33ms", "67ms", "134ms", "268ms", "536ms", | |
"1s", "2s", "4s", "8s", "17s", "34s", "68s", "137s"] | |
COLOURS = ["red", "darkorange", "gold", "greenyellow", "seagreen", "turquoise", "royalblue", "rebeccapurple"] | |
def _parse_histogram_rows(tsv_rows): | |
results = dict() | |
for row in tsv_rows: | |
bucket, *data = row.split("\t") | |
assert len(data) == len(COLUMN_HEADINGS), f"Expected {len(COLUMN_HEADINGS)} columns, found {len(data)}!" | |
results[bucket] = dict(zip(COLUMN_HEADINGS, map(int, data))) | |
assert len(results) == len(BUCKETS), f"Expected {len(BUCKETS)} rows, found {len(results)}!" | |
return results | |
def parse_output(tsv_value): | |
"""Parse the output of `zpool iostat -pwvH` into a dictionary.""" | |
results = dict() | |
for section_value_str in tsv_value.split("\n\n"): | |
section_value = section_value_str.strip() | |
if section_value == "": | |
continue | |
title, *histogram_rows = section_value.split("\n") | |
results[title] = _parse_histogram_rows(histogram_rows) | |
return results | |
def display_graphs(zpool_stats, *, name="", include_disks=None): | |
"""Plot the latency stats for a group of disks.""" | |
if include_disks is None: | |
include_disks = zpool_stats.keys() | |
for col in COLUMN_HEADINGS: | |
n_disks = len(include_disks) | |
width = round(1 / (n_disks + 2), 3) | |
bin_positions = np.arange(len(BUCKETS)) | |
fig, ax = plt.subplots() | |
fig.set_size_inches(20, 8) | |
figtitle = f"{name} {col}" | |
fig.suptitle(figtitle, fontsize=20) | |
handles = [] | |
for i, disk in enumerate(include_disks): | |
data = [r[col] for r in zpool_stats[disk].values()] | |
if sum(data) == 0: | |
# Skip all-blank groups. | |
continue | |
x_positions = bin_positions + (i-(n_disks // 2))*width # Shift bars around centre of the bin. | |
h = ax.bar(x_positions, data, width, color=COLOURS[i]) | |
handles.append(h) | |
ax.set_xticks(range(len(BUCKETS))[::2]) # Too crowded with every bin labelled. | |
ax.set_xticklabels(BUCKETS[::2]) | |
ax.semilogy() | |
ax.legend(handles=handles, labels=include_disks) | |
plt.show() | |
if __name__ == "__main__": | |
# Run `zpool iostat -pwvH > output.tsv`. | |
with open("output.tsv") as f: | |
stats = parse_output(f.read()) | |
disks = [d for d in stats.keys() if d not in ["tank"]] # Exclude whole-pool stats? | |
display_graphs(stats, name="zpool", include_disks=disks) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It is much easier to see which disk is bad at a glance from this:
than by comparing the bad disk output section:
to another disk output section
and the remaining disk sections too.