Last active
December 19, 2023 12:50
-
-
Save mentha/22be943df3b4a855a25dacb1b7e93b3c to your computer and use it in GitHub Desktop.
btrfs dedupe and backup scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from argparse import ArgumentParser | |
from contextlib import suppress | |
from datetime import datetime, timezone | |
from functools import cached_property | |
from sys import argv, stderr | |
from time import time | |
import json | |
import os | |
import re | |
import shlex | |
import subprocess as sp | |
import yaml | |
CONFIG_DEFAULT = '''\ | |
## btrfs borg configuration | |
#subvolume: subvolume # path to target subvolume | |
#retention: | |
# - span: 1d | |
# interval: 1h | |
# - span: 1w | |
# interval: 1d | |
# - span: 1M | |
# interval: 1w | |
#borg: | |
# - repo: | |
# name: # backup name | |
# compression: zstd | |
# options: # options as sh commandline, for borg-create and borg-list | |
# create-options: | |
# retention: | |
# - span: 1d | |
# interval: 4h | |
# - span: 1w | |
# interval: 1d | |
# - span: 6M | |
# interval: 1w | |
''' | |
class RetentionRule: | |
@staticmethod | |
def parse_span(span): | |
units = { | |
'usec': 1e-6, | |
'us': 1e-6, | |
'μs': 1e-6, | |
'msec': 1e-3, | |
'ms': 1e-3, | |
'seconds': 1, | |
'second': 1, | |
'sec': 1, | |
's': 1, | |
'minutes': 60, | |
'minute': 60, | |
'min': 60, | |
'm': 60, | |
'hours': 3600, | |
'hour': 3600, | |
'hr': 3600, | |
'h': 3600, | |
'days': 86400, | |
'day': 86400, | |
'd': 86400, | |
'weeks': 86400 * 7, | |
'week': 86400 * 7, | |
'w': 86400 * 7, | |
'months': 86400 * 365.25 / 12, | |
'month': 86400 * 365.25 / 12, | |
'M': 86400 * 365.25 / 12, | |
'years': 86400 * 365.25, | |
'year': 86400 * 365.25, | |
'y': 86400 * 365.25, | |
} | |
s = 0 | |
n = None | |
for p in re.findall(r'[0-9.]+|[a-zA-Z]+', span): | |
if p[0].isalpha(): | |
s += n * units[p] | |
n = None | |
else: | |
n = float(p) | |
if n is not None: | |
s += n | |
return s | |
def __init__(self, rules): | |
self.rules = sorted([ | |
(self.parse_span(r['span']), self.parse_span(r['interval'])) | |
for r in rules | |
]) | |
@staticmethod | |
def interval_adjust(i): | |
if i > 600: | |
i -= min(i / 10, 3600) | |
return i | |
def filter_expired(self, objs, key=float, now=None): | |
if now is None: | |
now = time() | |
lastts = None | |
for ts, obj in sorted([(key(o), o) for o in objs]): | |
age = now - ts | |
if age <= 0: | |
continue | |
interval = None | |
for span, spanint in self.rules: | |
if span >= age: | |
interval = spanint | |
break | |
if interval is None: | |
yield obj | |
continue | |
interval = self.interval_adjust(interval) | |
if lastts is not None and ts - lastts < interval: | |
yield obj | |
continue | |
lastts = ts | |
class BtrfsBorgConfig: | |
def __init__(self, snapvol): | |
d = None | |
with open(os.path.join(snapvol, 'config.yaml'), 'rb') as f: | |
d = yaml.safe_load(f) | |
self.subvolume = self._parse_subvolume(d.get('subvolume', 'subvolume'), snapvol) | |
self.retention = RetentionRule(d.get('retention', [ | |
{ 'span': '1d', 'interval': '1h' }, | |
{ 'span': '1w', 'interval': '1d' }, | |
{ 'span': '1M', 'interval': '1w' }, | |
])) | |
self.borg = [self.BorgRepo(c) for c in d.get('borg', [])] | |
@staticmethod | |
def _parse_subvolume(v, snapvol): | |
if os.path.isabs(v): | |
return v | |
return os.path.realpath(os.path.join(snapvol, v)) | |
class BorgRepo: | |
def __init__(self, r): | |
self.repo = r['repo'] | |
self.name = r['name'] | |
self.compression = r.get('compression', 'zstd') | |
self.options = shlex.split(r.get('options', '')) | |
self.create_options = shlex.split(r.get('create-options', '')) | |
self.retention = RetentionRule(r.get('retention', [ | |
{ 'span': '1d', 'interval': '4h' }, | |
{ 'span': '1w', 'interval': '1d' }, | |
{ 'span': '6M', 'interval': '1w' }, | |
])) | |
class FileLock: | |
def __init__(self, path): | |
self.f = open(path, 'ab') | |
def __enter__(self): | |
try: | |
os.lockf(self.f.fileno(), os.F_TLOCK, 0) | |
return self | |
except OSError: | |
self.f.close() | |
raise | |
def __exit__(self, *a): | |
self.f.close() | |
class BtrfsBorg: | |
@staticmethod | |
def unshared_main(target_path, src_path, *cmd): | |
sp.run(['mount', '--rbind', src_path, target_path], check=True) | |
os.execvp(cmd[0], cmd) | |
@staticmethod | |
def init_vol(subvol, snapvol): | |
sp.run(['btrfs', 'subvolume', 'create', snapvol], check=True) | |
os.mkdir(os.path.join(snapvol, 'snapshots')) | |
if not os.path.isabs(subvol): | |
subvol = os.path.relpath(subvol, snapvol) | |
os.symlink(subvol, os.path.join(snapvol, 'subvolume')) | |
with open(os.path.join(snapvol, 'config.yaml'), 'w', encoding='utf-8') as f: | |
f.write(CONFIG_DEFAULT) | |
@staticmethod | |
def eprint(*a): | |
print(*a, file=stderr) | |
stderr.flush() | |
@classmethod | |
def run_backup(cls, vols, dry): | |
failed = False | |
compact_repos = set() | |
for vol in sorted(vols): | |
cls.eprint(f'Backing up {vol}') | |
succeed = False | |
with suppress(Exception): | |
if dry: | |
cls.eprint(f'would back up {vol}') | |
else: | |
bb = cls(vol, compact_repos) | |
bb.do_backup() | |
succeed = True | |
cls.eprint(f'Backing up of {vol} ' + ('succeeded' if succeed else 'failed')) | |
if not succeed: | |
failed = True | |
for repo in sorted(compact_repos): | |
cls.eprint(f'Compacting repo {repo}') | |
sp.run(['borg', 'compact', repo], check=True) | |
if failed: | |
exit(1) | |
@classmethod | |
def main(cls): | |
if len(argv) >= 2 and argv[1] == '--unshared': | |
return cls.unshared_main(*argv[2:]) | |
a = ArgumentParser(description='Btrfs backup tool') | |
a.add_argument('--init', metavar='SUBVOLUME', help='initialize new snapshot path') | |
a.add_argument('--dry', action='store_true', help='dry run') | |
a.add_argument('snapshot_vol', nargs='+', help='snapshot path or directory of snapshot vols') | |
a = a.parse_args() | |
if a.init: | |
if len(a.snapshot_vol) != 1: | |
raise RuntimeError('only one volume could be initialized') | |
if a.dry: | |
vol = a.snapshot_vol[0] | |
cls.eprint(f'would initialize {vol}') | |
else: | |
cls.init_vol(a.init, vol) | |
else: | |
vols = [] | |
for v in a.snapshot_vol: | |
if os.path.exists(os.path.join(v, 'config.yaml')): | |
vols.append(os.path.realpath(v)) | |
else: | |
for e in os.listdir(v): | |
p = os.path.realpath(os.path.join(v, e)) | |
if os.path.isdir(p): | |
vols.append(p) | |
cls.run_backup(vols, a.dry) | |
def __init__(self, vol, compact_repo=None): | |
self.snapvol = vol | |
if compact_repo is None: | |
compact_repo = set() | |
self.compact_repo = compact_repo | |
@cached_property | |
def config(self): | |
return BtrfsBorgConfig(self.snapvol) | |
@staticmethod | |
def parse_ts(s): | |
return datetime.fromisoformat(s).timestamp() | |
@staticmethod | |
def format_ts(ts): | |
return datetime.fromtimestamp(ts, timezone.utc).isoformat() | |
def do_backup(self): | |
with FileLock(os.path.join(self.snapvol, 'lock')): | |
newvol = os.path.join(self.snapvol, 'snapshots', '.new') | |
if os.path.exists(newvol): | |
sp.run(['btrfs', 'subvolume', 'delete', newvol], stdout=sp.DEVNULL, stderr=sp.DEVNULL, check=False) | |
ts = time() | |
sp.run(['btrfs', 'subvolume', 'snapshot', '-r', self.config.subvolume, newvol], stdout=sp.DEVNULL, check=True) | |
for s in self.config.retention.filter_expired(filter( | |
lambda x: not x.startswith('.'), | |
os.listdir(os.path.join(self.snapvol, 'snapshots'))), | |
key=self.parse_ts, now=ts): | |
sp.run(['btrfs', 'subvolume', 'delete', os.path.join(self.snapvol, 'snapshots', s)], check=True) | |
curvol = os.path.join(self.snapvol, 'snapshots', self.format_ts(ts)) | |
os.rename(newvol, curvol) | |
borgcount = 0 | |
borgfail = 0 | |
for b in self.config.borg: | |
try: | |
self.run_borg(b, ts, curvol) | |
except: | |
borgfail += 1 | |
finally: | |
borgcount += 1 | |
if borgcount > 0 and borgfail >= borgcount: | |
raise RuntimeError('all backups to borg failed') | |
def run_borg(self, conf, ts, path): | |
arcs = list(filter(lambda n: n.startswith(conf.name + '-'), | |
[a['archive'] for a in json.loads(sp.run( | |
['borg', 'list', '--json'] + conf.options + [conf.repo], | |
stdout=sp.PIPE, check=True).stdout)['archives'] | |
])) | |
newarc = conf.name + '-' + self.format_ts(ts) | |
if newarc in arcs: | |
raise RuntimeError(f'archive {newarc} already exist') | |
arcs.append(newarc) | |
expired = list(conf.retention.filter_expired(arcs, | |
key=lambda n: self.parse_ts(n[len(conf.name) + 1:]), now=ts)) | |
if newarc not in expired: | |
sp.run(['unshare', '-m', '--propagation', 'private', argv[0], '--unshared', self.config.subvolume, path] + | |
['borg', 'create', '--checkpoint-interval', '600'] + conf.options + conf.create_options + | |
[conf.repo + '::' + newarc, self.config.subvolume], stdin=sp.DEVNULL, check=True) | |
for n in expired: | |
if n == newarc: | |
continue | |
sp.run(['borg', 'delete'] + conf.options + [conf.repo + '::' + n], check=True) | |
self.compact_repo.add(conf.repo) | |
if __name__ == '__main__': | |
BtrfsBorg.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Service] | |
ExecStart=/usr/bin/btrfsborg.py /etc/btrfsborg | |
Nice=19 | |
CPUSchedulingPolicy=idle | |
IOSchedulingClass=idle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Timer] | |
OnCalendar=hourly | |
[Install] | |
WantedBy=timers.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Service] | |
ExecStart=/usr/bin/btrfsborg.py %f | |
Nice=19 | |
CPUSchedulingPolicy=idle | |
IOSchedulingClass=idle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Timer] | |
OnCalendar=hourly | |
[Install] | |
WantedBy=timers.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Service] | |
ExecStart=/usr/bin/duperemove --dedupe-options=same -hqdr %f | |
Nice=19 | |
CPUSchedulingPolicy=idle | |
IOSchedulingClass=idle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Timer] | |
OnCalendar=weekly | |
Persistent=true | |
[Install] | |
WantedBy=timers.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pkgname=btrfsmaint | |
pkgver=0.20231218.0 | |
pkgrel=1 | |
pkgdesc='extra btrfs maintenance and backup tools' | |
arch=(any) | |
license=(Unlicense) | |
depends=( | |
borg | |
btrfs-progs | |
duperemove | |
python | |
python-yaml | |
systemd | |
) | |
source=( | |
btrfsborg.py | |
btrfsborg.service | |
btrfsborg.timer | |
[email protected] | |
[email protected] | |
[email protected] | |
[email protected] | |
) | |
cksums=( | |
SKIP | |
SKIP | |
SKIP | |
SKIP | |
SKIP | |
SKIP | |
SKIP | |
) | |
package() { | |
install -Dm755 -t "$pkgdir/usr/bin" "$srcdir/btrfsborg.py" | |
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/btrfsborg.service" | |
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/btrfsborg.timer" | |
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/[email protected]" | |
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/[email protected]" | |
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/[email protected]" | |
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/[email protected]" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment