-
-
Save Guiorgy/08ead139a34481a6c3a93a86b7bb4769 to your computer and use it in GitHub Desktop.
ZFS Health Check Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Copyright 2025 Guiorgy | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the “Software”), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in | |
# all copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
set -eu | |
# Exit codes | |
CODE_OK=0 # All ZFS volumes are healthy | |
CODE_ERROR=1 # General script error | |
CODE_UNHEALTHY=2 # One of the ZFS volumes is unhealthy | |
CODE_RWC=3 # One of the ZFS volumes has READ/WRITE/CKSUM errors | |
CODE_CAPACITY=4 # One of the ZFS volumes is reaching its capacity | |
CODE_SCRUB=5 # One of the ZFS requires a scrub | |
# Health - Check if all zfs volumes are in good condition. | |
# We are looking for any keyword signifying a degraded or broken array. | |
if /sbin/zpool status | grep -E '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)'; then | |
exit $CODE_UNHEALTHY | |
fi | |
# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors | |
# on all volumes and all drives using "zpool status". If any non-zero errors | |
# are reported an email will be sent out. You should then look to replace the | |
# faulty drive and run "zpool scrub" on the affected volume after resilvering. | |
if /sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000; then | |
exit $CODE_RWC | |
fi | |
# Capacity | |
# | |
# ZFS uses a copy-on-write scheme. The file system writes new data to | |
# sequential free blocks first and when the uberblock has been updated the new | |
# inode pointers become valid. This method is true only when the pool has | |
# enough free sequential blocks. If the pool is at capacity and space limited, | |
# ZFS will have to randomly write blocks. This means ZFS can not create an | |
# optimal set of sequential writes and write performance is severely impacted. | |
# | |
# As such, it is recommended to set a quote of 80%-95% of the total capacity. | |
# The percentage really depends on how large your volume is. If you have a | |
# 128GB SSD then 80% is reasonable. If you have a 60TB raid-z2 array then you | |
# can probably set the warning closer to 95%. 90% is a good default. | |
defaultQuotaCapacity=90 | |
minRemainingCapacity=5 | |
zfsVolumes=$(/sbin/zpool list -H -o name) | |
for volume in ${zfsVolumes}; do | |
size=$(/sbin/zpool list -H -p -o size $volume) | |
quota=$(/sbin/zfs get -H -p -o value quota $volume) | |
if [ $quota -ne 0 ]; then | |
quotaCapacity=$(echo "$quota / $size * 100" | bc -l) | |
else | |
quotaCapacity=$defaultQuotaCapacity | |
fi | |
capacity=$(/sbin/zpool list -H -p -o capacity $volume) | |
capacityLeft=$(echo "$quotaCapacity - $capacity" | bc -l) | |
if [ $(echo "$capacityLeft < $minRemainingCapacity" | bc -l) -eq 1 ]; then | |
exit $CODE_CAPACITY | |
fi | |
done | |
exit $CODE_OK |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment