Created
February 15, 2018 12:59
-
-
Save simpleadm/506d539f664022f98230f0398f60cbd4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# copyright: 2011, Igor Katson, [email protected] | |
"""What this script does, is logartihmically keep files, that means, | |
when you provide a dir, or a file pattern to it, it can calculate, | |
which files to keep based on the following parameters: | |
- keep 1 file each day for --days days, | |
- keep 1 file each week for --weeks weeks (after --days processing) | |
- keep 1 file each month for --month months (after --month processing) | |
Use ./rotater --help for help | |
""" | |
import os | |
import datetime | |
import sys | |
import re | |
import optparse | |
import logging | |
import types | |
import urlparse | |
import ConfigParser | |
from django.conf import settings | |
settings.configure() | |
from django.core.exceptions import ImproperlyConfigured | |
from django.core.files.storage import FileSystemStorage, Storage | |
log = logging.getLogger('rotater.py') | |
# Regexps to take the date from filename | |
DATE_RE = ( | |
re.compile(r'(20\d{2})-(\d{2})-(\d{2})'), | |
re.compile(r'(20\d{2})(\d{2})(\d{2})'), | |
) | |
class WalkingStorageMixin(object): | |
def __init__(self, *args, **kwargs): | |
self.walk_top = kwargs.pop('walk_top', '') | |
super(WalkingStorageMixin, self).__init__(*args, **kwargs) | |
def walk(self, top=None, topdown=True, onerror=None): | |
"""An implementation of os.walk() which uses the Django storage for | |
listing directories.""" | |
top = top or self.walk_top | |
try: | |
dirs, nondirs = self.listdir(top) | |
except os.error, err: | |
if onerror is not None: | |
onerror(err) | |
return | |
if topdown: | |
yield top, dirs, nondirs | |
for name in dirs: | |
new_path = os.path.join(top, name) | |
for x in self.walk(new_path): | |
yield x | |
if not topdown: | |
yield top, dirs, nondirs | |
class WalkingFileStorage(WalkingStorageMixin, FileSystemStorage): | |
pass | |
try: | |
from storages.backends.s3boto import S3BotoStorage | |
class WalkingS3Storage(WalkingStorageMixin, S3BotoStorage): | |
pass | |
except ImportError: | |
S3BotoStorage = None | |
WalkingS3Storage = None | |
def get_storage_by_path(path, **options): | |
def get_amazon_auth(options): | |
# Try to get keys from options. | |
key = options.get('amazon_access_key') | |
secret = options.get('amazon_secret_key') | |
if key and secret: | |
return key, secret | |
# Try to get keys from environment. | |
key = os.environ.get('AWS_ACCESS_KEY_ID') | |
secret = os.environ.get('AWS_SECRET_ACCESS_KEY') | |
if key and secret: | |
return key, secret | |
# Try to get keys from ~/.s3cfg, the file used by s3cmd. | |
s3cfg = os.path.expanduser('~/.s3cfg') | |
if os.path.exists(s3cfg): | |
parser = ConfigParser.ConfigParser() | |
parser.read([s3cfg]) | |
key = parser.get('default', 'access_key') | |
secret = parser.get('default', 'secret_key') | |
if key and secret: | |
return key, secret | |
raise ValueError('AWS access credentials not provided.') | |
if path.startswith('s3://'): | |
parsed = urlparse.urlparse(path) | |
if not parsed.netloc: | |
raise ValueError( | |
'You should provide at least a bucket name, e.g. s3://BUCKET/') | |
path = parsed.path | |
if path: | |
path = path[1:] | |
access_key, secret_key = get_amazon_auth(options) | |
return WalkingS3Storage( | |
bucket=parsed.netloc, | |
walk_top=path, | |
access_key=access_key, | |
secret_key=secret_key, | |
) | |
return WalkingFileStorage(path) | |
class BaseRotater(object): | |
"""A base class for rotaters, override files_to_delete for it to work""" | |
def __init__(self, storage, regex=None, **kwargs): | |
assert isinstance(storage, Storage) | |
self.storage = storage | |
self.regex = regex | |
if isinstance(self.regex, basestring): | |
self.regex = re.compile(self.regex) | |
self.recurse = kwargs.pop('recurse', False) | |
self.options = kwargs | |
def walk(self): | |
"""Walk through all files and dirs, that are candidates for removal.""" | |
for dir, dirs, files in self.storage.walk(): | |
if not self.regex: | |
yield dir, dirs, files | |
else: | |
yield dir, dirs, [i for i in files | |
if self.regex.match(os.path.join(dir, i))] | |
if not self.recurse: | |
raise StopIteration | |
def files_to_delete(self): | |
"""Return a list of files to be deleted""" | |
raise NotImplementedError | |
def files_to_keep(self): | |
"""Get files to keep based on files to delete""" | |
delete = set(self.files_to_delete()) | |
for dir, dirs, files in self.walk(): | |
for file in files: | |
file = os.path.join(dir, file) | |
if file not in delete: | |
yield file | |
def rotate(self): | |
for file in self.files_to_delete(): | |
log.info('Deleting %s' % file) | |
self.storage.delete(file) | |
def _get_mtime(self, filename): | |
"""Get modification time of the file based on filename or mtime.""" | |
if self.options.get('date_from_filename', True): | |
for re in DATE_RE: | |
match = re.search(filename) | |
if match: | |
year, month, day = match.groups() | |
try: | |
mtime = datetime.date(int(year), int(month), int(day)) | |
return datetime.datetime( | |
mtime.year, mtime.month, mtime.day) | |
except ValueError: | |
pass | |
return self.storage.modified_time(filename) | |
class LogarithmicRotater(BaseRotater): | |
DEFAULT_DAYS = 14 | |
DEFAULT_WEEKS = 12 | |
DEFAULT_MONTHS = 36 | |
def __init__(self, *args, **kwargs): | |
self.days = kwargs.pop('days', None) | |
if self.days is None: | |
self.days = self.DEFAULT_DAYS | |
self.weeks = kwargs.pop('weeks', None) | |
if self.weeks is None: | |
self.weeks = self.DEFAULT_WEEKS | |
self.months = kwargs.pop('months', None) | |
if self.months is None: | |
self.months = self.DEFAULT_MONTHS | |
super(LogarithmicRotater, self).__init__(*args, **kwargs) | |
def _logarithmic_rotate(self, files): | |
"""Files is a list of files to check for deletion | |
files argument is a list of 2-tuples with mtime and filename. | |
- keep 1 file each day for self.days days, | |
- keep 1 file each week for self.weeks weeks (after --days processing) | |
- keep 1 file each month for self.months months (after --month processing) | |
Return a list of files which can be deleted | |
""" | |
start_rotate = datetime.date.today() | |
weeks_start = start_rotate - datetime.timedelta(days=self.days) | |
months_start = weeks_start - datetime.timedelta(weeks=self.weeks) | |
end_rotate = months_start - datetime.timedelta(days=self.months * 30) | |
kept_days = set() | |
get_month = lambda mdate: mdate.replace(day=1) | |
get_week = lambda mdate: mdate - datetime.timedelta(days=mdate.weekday()) | |
for mtime, file in files: | |
mdate = mtime.date() | |
if mdate <= end_rotate: | |
yield file | |
elif end_rotate < mdate <= months_start: | |
# Keep one file for each month | |
month = get_month(mdate) | |
if month in kept_days: | |
yield file | |
else: | |
kept_days.add(month) | |
elif months_start < mdate <= weeks_start: | |
# Keep one file for each week | |
week = get_week(mdate) | |
if week in kept_days: | |
yield file | |
else: | |
kept_days.add(week) | |
elif weeks_start < mdate <= start_rotate: | |
# Keep one file for each day | |
if mdate in kept_days: | |
yield file | |
else: | |
kept_days.add(mdate) | |
else: | |
# The file seems to be from future, keep it | |
pass | |
def files_to_delete(self): | |
for top, dirs, files in self.walk(): | |
files = [os.path.join(top, file) for file in files] | |
files = ( | |
(self._get_mtime(f), f) for f in files | |
) | |
files = self._logarithmic_rotate(files) | |
for file in files: | |
yield file | |
if __name__ == '__main__': | |
optparser = optparse.OptionParser( | |
usage="""usage: %prog [options] path | |
"path" may be a path to a directory, or an s3 URL e.g. | |
s3://BUCKET[/DIRNAME]""") | |
optparser.add_option( | |
'--regex', help='Optional regex to match filenames for.' | |
' The full path will be matched.') | |
optparser.add_option( | |
'--days', type='int', | |
help='keep 1 file each day for --days days [%default]', | |
default=LogarithmicRotater.DEFAULT_DAYS) | |
optparser.add_option( | |
'--weeks', type='int', | |
help='keep 1 file each week for --weeks weeks [%default]', | |
default=LogarithmicRotater.DEFAULT_WEEKS) | |
optparser.add_option( | |
'--months', type='int', | |
help='keep 1 file each month for --months months [%default]', | |
default=LogarithmicRotater.DEFAULT_MONTHS) | |
optparser.add_option( | |
'--no-date-from-filename', action='store_false', | |
help='do not try to guess modification time from filename [%default]', | |
default=True, dest='date_from_filename') | |
optparser.add_option( | |
'--test', '--test-delete', action='store_true', default=False, | |
dest='test_delete', | |
help='test mode, no files will be deleted, the ones to ' | |
'DELETE will be printed to stdout [%default]') | |
optparser.add_option( | |
'--test-keep', action='store_true', default=False, | |
help='test mode, no files will be deleted, the ones to ' | |
'KEEP will be printed to stdout [%default]') | |
optparser.add_option( | |
'--recurse', action='store_true', default=False, | |
help='Recurse into subdirectories [%default]', | |
) | |
optparser.add_option( | |
'--loglevel', | |
default='DEBUG', | |
help='One of DEBUG, WARNING, INFO, ERROR [%default]') | |
optparser.add_option('--amazon_access_key', type='string') | |
optparser.add_option('--amazon_secret_key', type='string') | |
options, args = optparser.parse_args() | |
if len(args) != 1: | |
print >> sys.stderr, '%s takes only 1 argument\nAborting...' % sys.argv[0] | |
sys.exit(1) | |
dir = args[0] | |
loglevels = { | |
'debug': logging.DEBUG, | |
'info': logging.INFO, | |
'warning': logging.WARNING, | |
'error': logging.ERROR, | |
} | |
loglevel=loglevels[options.loglevel.lower()] | |
handler = logging.StreamHandler(sys.stdout) | |
handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s")) | |
log.addHandler(handler) | |
log.setLevel(loglevel) | |
storage = get_storage_by_path(dir, **options.__dict__) | |
rotater = LogarithmicRotater(storage, **options.__dict__) | |
if options.test_delete: | |
for file in sorted(rotater.files_to_delete()): | |
log.info('will delete "%s"', file) | |
elif options.test_keep: | |
for file in sorted(rotater.files_to_keep()): | |
log.info('will keep "%s"', file) | |
else: | |
rotater.rotate() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage: rotater.py --days 21 --weeks 24 --month 36 /backups/db