-
-
Save rrauenza/a559368c60239684b815d33e919ffb43 to your computer and use it in GitHub Desktop.
btrfs-smr-balance.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# The goal of this is to gradually balance a btrfs filesystem which contains DM-SMR drives. | |
# Such drive are described in detail at https://www.usenix.org/node/188434 | |
# A normal drive should be able to balance a single 1GB chunk in under 30s. | |
# Such a stripe would normally be written directly to the shingled blocks, but in the case | |
# it was cached, it would take roughly 100s to clean. | |
# So our heuristic here is: | |
# * balance one chunk from the drive with the amount of unallocated space. | |
# * if it took longer than 30s, increase the per-chunk sleep interval | |
# * otherwise, decrease the per-chunk sleep interval | |
# | |
# The basic heuristic taken here is to balance a few chunks at a time from the drive with | |
# the least amount of unallocated space. The balance is timed and the tool slows down if | |
# the balance takes longer than some threshold. | |
from functools import lru_cache | |
import logging | |
import statistics | |
import subprocess | |
import sys | |
import time | |
logging.basicConfig( | |
format='%(asctime)s %(levelname)-8s %(message)s', | |
level=logging.INFO, | |
datefmt='%Y-%m-%d %H:%M:%S') | |
FILESYSTEM = '/media/btrfs' | |
CHUNK_TIMEOUT = 60 # seconds | |
MAX_SLEEP = 7200 # seconds | |
STDEV_LIMIT = 10*1024*1024*1024 # bytes | |
@lru_cache | |
def fib(n): | |
if n < 2: | |
return 1 | |
return fib(n-2) + fib(n-1) | |
def sizeof_fmt(num, suffix='B'): | |
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: | |
if abs(num) < 1024.0: | |
return "%3.1f%s%s" % (num, unit, suffix) | |
num /= 1024.0 | |
return "%.1f%s%s" % (num, 'Yi', suffix) | |
def bal_chunk(): | |
least_empty_dev_id = None | |
least_empty_dev_path = None | |
least_empty_dev_unallocated = float('inf') | |
free = [] | |
for line in subprocess.getoutput('btrfs fi show --raw %s | grep devid' % FILESYSTEM).split('\n'): | |
x = line.split() | |
devid = x[1] | |
size = int(x[3]) | |
used = int(x[5]) | |
path = x[7] | |
unallocated = size - used | |
free.append(unallocated) | |
if unallocated < least_empty_dev_unallocated: | |
least_empty_dev_id = devid | |
least_empty_dev_path = path | |
least_empty_dev_unallocated = unallocated | |
stdev = statistics.stdev(free) | |
if stdev < STDEV_LIMIT: | |
logging.info('Unallocated space stdev %s is below %s, exiting...' % (sizeof_fmt(stdev), sizeof_fmt(STDEV_LIMIT))) | |
sys.exit() | |
else: | |
logging.info('Unallocated space stdev %s is above %s, continuing...' % (sizeof_fmt(stdev), sizeof_fmt(STDEV_LIMIT))) | |
logging.info('Balancing the least empty device: %s with %s unallocated' % (least_empty_dev_path, sizeof_fmt(least_empty_dev_unallocated))) | |
cmd = 'btrfs balance start -ddevid=%s,limit=2 %s' % (least_empty_dev_id, FILESYSTEM) | |
ret, out = subprocess.getstatusoutput(cmd) | |
if ret != 0: | |
logging.warning(out) | |
time.sleep(30) | |
else: | |
logging.info(out) | |
def fib_sleep(index): | |
seconds = fib(index) | |
until = time.strftime("%H:%M:%S", time.localtime(time.time() + seconds)) | |
logging.info("Sleeping %ds until %s" % (seconds, until)) | |
time.sleep(seconds) | |
backoff = 0 | |
while True: | |
start = time.time() | |
bal_chunk() | |
duration = time.time() - start | |
yeet = "Ouch!" if duration > CHUNK_TIMEOUT else "Nice!" | |
logging.info("%s Last chunk took %ds" % (yeet, duration)) | |
if (duration > CHUNK_TIMEOUT): | |
if fib(backoff + 1) < MAX_SLEEP: backoff += 1 | |
fib_sleep(backoff) | |
else: | |
if backoff > 0: backoff -= 1 | |
fib_sleep(backoff) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment