Skip to content

Instantly share code, notes, and snippets.

@Pentusha
Last active February 11, 2019 14:53
Show Gist options
  • Save Pentusha/bac6fc8528c69500cc39765d9772793d to your computer and use it in GitHub Desktop.
Save Pentusha/bac6fc8528c69500cc39765d9772793d to your computer and use it in GitHub Desktop.
Remove old cassandra and skylla snapshots
import os
import re
from datetime import datetime
from subprocess import run, PIPE
TARGET_KEYSPACE = 'prod'
_10GB = 10 ** 10
listsnapshots_re = re.compile(
r'(?P<timestamp>\d+)\s+'
r'(?P<keyspace>\w+)\s+'
r'(?P<column_family_name>\S+)\s+'
r'(?P<true_size>\d+.?\d*\s\S+)\s+'
r'(?P<disk_size>\d+.?\d*\s\S+)'
)
units = {
'bytes': 1,
'KB': 10 ** 3,
'MB': 10 ** 6,
'GB': 10 ** 9,
'TB': 10 ** 12,
}
def parse_size(size: str):
number, unit = [string.strip() for string in size.split()]
return int(float(number)*units[unit])
def sizeof_fmt(num, suffix='B'):
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def main():
proc = run(['nodetool', 'listsnapshots'], stdout=PIPE, stderr=PIPE)
out = proc.stdout.decode().split(os.linesep)
snapshots = []
for line in out:
re_match = listsnapshots_re.match(line)
if not re_match:
continue
group = re_match.groupdict()
if group['keyspace'] != TARGET_KEYSPACE:
continue
when = datetime.fromtimestamp(float(group['timestamp']) / 1000)
true_size = parse_size(group['true_size'])
disk_size = parse_size(group['disk_size'])
snapshots.append({
'when': when,
'timestamp': group['timestamp'],
'true_size': true_size,
'disk_size': disk_size,
})
true_size = sum(ss['true_size'] for ss in snapshots)
disk_size = sum(ss['disk_size'] for ss in snapshots)
disk_status = os.statvfs('/var')
print('SNAPHOTS: {snapshots}, TRUE_SIZE: {true_size}, DISK_SIZE: {disk_size}, SPACE_AVAILABLE: {disk_available}'.format(
snapshots=len(snapshots),
true_size=sizeof_fmt(true_size),
disk_size=sizeof_fmt(disk_size),
disk_available=sizeof_fmt(disk_status.f_frsize * disk_status.f_bavail),
))
while True:
disk_status = os.statvfs('/var')
if disk_status.f_frsize * disk_status.f_bavail > _10GB:
return
if not snapshots:
return
latest_snapshot = snapshots.pop()
run(['nodetool', 'clearsnapshot', '-t', latest_snapshot['timestamp']], stdout=PIPE, stderr=PIPE)
print('SNAPSOT DELETED', latest_snapshot)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment