Created
May 27, 2017 03:57
-
-
Save wizpig64/01c88712991182000eea8d94c36137bd to your computer and use it in GitHub Desktop.
zfsdiff.py - show the size difference between replicated pools.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""zfsdiff.py - show the size difference between replicated pools. | |
It will also attempt to guess how many days behind replication is, | |
and output will go to stderr rather than stdout if difference > threshold, | |
so you can run receive alerts when things get too far behind. | |
""" | |
from __future__ import print_function, unicode_literals | |
import re | |
import subprocess | |
import sys | |
from datetime import date | |
__author__ = "Phillip Marshall" | |
__copyright__ = "Copyright 2017, Phillip Marshall" | |
__license__ = "GPL" | |
__version__ = "1.0" | |
# Configuration | |
# ============= | |
pool_local = 'tank' | |
pool_remote = 'tank-backup' | |
command_local = 'sshpass -p PASSWORD ssh [email protected] zfs list -t snapshot -o used,name | grep @' | |
command_remote = 'sshpass -p PASSWORD ssh [email protected] zfs list -t snapshot -o used,name | grep @' | |
error_threshold = '100M' | |
# Size Translation | |
# ================ | |
prefixes_ordered = [ | |
('', 1.0), | |
('K', 1024.0), | |
('M', 1048576.0), | |
('G', 1073741824.0), | |
('T', 1099511627776.0), | |
('P', 1125899906842624.0), | |
] | |
prefixes = dict(prefixes_ordered) | |
def human_to_bytes(h): | |
human_num = h.strip(' KMGT') | |
human_prefix = h.strip(' 0123456789.') | |
return prefixes[human_prefix] * float(human_num) | |
def bytes_to_human(b): | |
for prefix, mult in prefixes_ordered: | |
num = b / mult | |
if num < 1024.0: | |
break | |
if num == int(num): | |
num = int(num) | |
num = str(num)[:4] | |
if num.endswith('.'): | |
num = num[:-1] | |
return '{}{}'.format(num, prefix) | |
def test_conversions(): | |
print(human_to_bytes(' 0')) | |
print(human_to_bytes('14.1K')) | |
print(human_to_bytes('9.99G')) | |
print(human_to_bytes('9999M')) | |
print(bytes_to_human(0)) | |
print(bytes_to_human(14438)) | |
print(bytes_to_human(10726680821)) | |
print(bytes_to_human(10484711424)) | |
# Regex | |
# ===== | |
snapshot_pattern = re.compile(r'^ *(?P<used_human>[0-9.]+[KMGT]*) +(?P<dataset>\S+)@(?P<snapshot>\S+)$') | |
datestamp_pattern = re.compile(r'.*(?P<year>20[0-9][0-9])(?P<month>[01][0-9])(?P<day>[0-3][0-9]).*') | |
# Dates | |
# ===== | |
def extract_date(string): | |
m = datestamp_pattern.match(string) | |
if m: | |
return date(**{k: int(v) for k, v in m.groupdict().items()}) | |
return None | |
def latest_date(l): | |
latest = date.min | |
for line in l: | |
extracted = extract_date(line) | |
if extracted and extracted > latest: | |
latest = extracted | |
return latest | |
# Main | |
# ==== | |
def print_error(*args, **kwargs): | |
print(*args, file=sys.stderr, **kwargs) | |
if __name__ == '__main__': | |
collection = dict() | |
differences = dict() | |
# get the zfs list output for local and remote pools | |
p = subprocess.Popen(command_local.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
stdout, stderr = p.communicate() | |
list_local = stdout.splitlines() | |
p = subprocess.Popen(command_remote.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
stdout, stderr = p.communicate() | |
list_remote = stdout.splitlines() | |
# record relevant snapshot sizes | |
for line in list_local + list_remote: | |
match = snapshot_pattern.match(line) | |
if not match: | |
continue | |
dataset = match.group('dataset') | |
snapshot = match.group('snapshot') | |
used = human_to_bytes(match.group('used_human')) | |
if dataset not in collection: | |
collection[dataset] = dict() | |
collection[dataset][snapshot] = used | |
# calculate differences for each dataset under the pool | |
for dataset_local in sorted(collection.keys()): | |
if any([ | |
(not dataset_local.startswith(pool_local)), | |
(dataset_local.startswith(pool_remote)), | |
('/.' in dataset_local), # ignore hidden datasets | |
]): | |
continue | |
dataset_remote = dataset_local.replace(pool_local, pool_remote) | |
if dataset_remote not in collection: | |
differences[dataset_local] = None | |
continue | |
# prune snapshots that exist in both pools | |
# if we don't do this, snapshot size differences will suck. | |
for snapshot in collection[dataset_remote].keys(): | |
waste = collection[dataset_local].pop(snapshot, None) | |
waste = collection[dataset_remote].pop(snapshot, None) | |
total_used_local = sum([x for x in collection[dataset_local].values()]) | |
total_used_remote = sum([x for x in collection[dataset_remote].values()]) | |
difference = max(0, total_used_local - total_used_remote) | |
differences[dataset_local] = difference | |
# calculate total difference, determine if past threshold | |
total_difference = sum([v for v in differences.values() if v]) | |
if total_difference > human_to_bytes(error_threshold): | |
p = print_error | |
else: | |
p = print | |
# print each difference if not zero. | |
for dataset in sorted(differences.keys()): | |
d = differences[dataset] | |
if d is None: | |
p(dataset, 'error: no remote dataset') | |
elif d: | |
p(dataset, bytes_to_human(d)) | |
# determine days difference - not perfect | |
local_date = latest_date(list_local) | |
remote_date = latest_date(list_remote) | |
days_behind = (local_date - remote_date).days | |
p('total: {}, {} days behind.'.format(bytes_to_human(total_difference), days_behind)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment