Created May 27, 2017 03:57
#!/usr/bin/env python
""" - show the size difference between replicated pools.
It will also attempt to guess how many days behind replication is,
and output will go to stderr rather than stdout if difference > threshold,
so you can run receive alerts when things get too far behind.
from __future__ import print_function, unicode_literals
import re
import subprocess
import sys
from datetime import date
__author__ = "Phillip Marshall"
__copyright__ = "Copyright 2017, Phillip Marshall"
__license__ = "GPL"
__version__ = "1.0"
# Configuration
# =============
pool_local = 'tank'
pool_remote = 'tank-backup'
command_local = 'sshpass -p PASSWORD ssh [email protected] zfs list -t snapshot -o used,name | grep @'
command_remote = 'sshpass -p PASSWORD ssh [email protected] zfs list -t snapshot -o used,name | grep @'
error_threshold = '100M'
# Size Translation
# ================
prefixes_ordered = [
('', 1.0),
('K', 1024.0),
('M', 1048576.0),
('G', 1073741824.0),
('T', 1099511627776.0),
('P', 1125899906842624.0),
prefixes = dict(prefixes_ordered)
def human_to_bytes(h):
human_num = h.strip(' KMGT')
human_prefix = h.strip(' 0123456789.')
return prefixes[human_prefix] * float(human_num)
def bytes_to_human(b):
for prefix, mult in prefixes_ordered:
num = b / mult
if num < 1024.0:
if num == int(num):
num = int(num)
num = str(num)[:4]
if num.endswith('.'):
num = num[:-1]
return '{}{}'.format(num, prefix)
def test_conversions():
print(human_to_bytes(' 0'))
# Regex
# =====
snapshot_pattern = re.compile(r'^ *(?P<used_human>[0-9.]+[KMGT]*) +(?P<dataset>\S+)@(?P<snapshot>\S+)$')
datestamp_pattern = re.compile(r'.*(?P<year>20[0-9][0-9])(?P<month>[01][0-9])(?P<day>[0-3][0-9]).*')
# Dates
# =====
def extract_date(string):
m = datestamp_pattern.match(string)
if m:
return date(**{k: int(v) for k, v in m.groupdict().items()})
return None
def latest_date(l):
latest = date.min
for line in l:
extracted = extract_date(line)
if extracted and extracted > latest:
latest = extracted
return latest
# Main
# ====
def print_error(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
if __name__ == '__main__':
collection = dict()
differences = dict()
# get the zfs list output for local and remote pools
p = subprocess.Popen(command_local.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
list_local = stdout.splitlines()
p = subprocess.Popen(command_remote.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
list_remote = stdout.splitlines()
# record relevant snapshot sizes
for line in list_local + list_remote:
match = snapshot_pattern.match(line)
if not match:
dataset ='dataset')
snapshot ='snapshot')
used = human_to_bytes('used_human'))
if dataset not in collection:
collection[dataset] = dict()
collection[dataset][snapshot] = used
# calculate differences for each dataset under the pool
for dataset_local in sorted(collection.keys()):
if any([
(not dataset_local.startswith(pool_local)),
('/.' in dataset_local), # ignore hidden datasets
dataset_remote = dataset_local.replace(pool_local, pool_remote)
if dataset_remote not in collection:
differences[dataset_local] = None
# prune snapshots that exist in both pools
# if we don't do this, snapshot size differences will suck.
for snapshot in collection[dataset_remote].keys():
waste = collection[dataset_local].pop(snapshot, None)
waste = collection[dataset_remote].pop(snapshot, None)
total_used_local = sum([x for x in collection[dataset_local].values()])
total_used_remote = sum([x for x in collection[dataset_remote].values()])
difference = max(0, total_used_local - total_used_remote)
differences[dataset_local] = difference
# calculate total difference, determine if past threshold
total_difference = sum([v for v in differences.values() if v])
if total_difference > human_to_bytes(error_threshold):
p = print_error
p = print
# print each difference if not zero.
for dataset in sorted(differences.keys()):
d = differences[dataset]
if d is None:
p(dataset, 'error: no remote dataset')
elif d:
p(dataset, bytes_to_human(d))
# determine days difference - not perfect
local_date = latest_date(list_local)
remote_date = latest_date(list_remote)
days_behind = (local_date - remote_date).days
p('total: {}, {} days behind.'.format(bytes_to_human(total_difference), days_behind))
