Created
November 4, 2011 19:15
-
-
Save itdaniher/1340224 to your computer and use it in GitHub Desktop.
rsync backup script using hardlinks, written by someone in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Script for automatic backup of files to a fileserver using rsync. | |
# Directories to backup and details about the remote fileserver and | |
# exclude patterns must be entered in the backup script itself. | |
# See 'User Configurable Parameters' below. | |
# | |
# Backup consists of three functions controlled by commandline arguments: | |
# | |
# /etc/backup sync | |
# | |
# Performs the incremental rsync of select directories | |
# Can be run frequently e.g. every half hour | |
# | |
# | |
# /etc/backup snapshot | |
# | |
# Rotates backups to retain snapshots back in time | |
# Should be run every 2 or 4 hours | |
# | |
# | |
# /etc/backup weedout | |
# | |
# Deletes superfluous snapshots. | |
# Should be run every day after midnight | |
# | |
# Without arguments backup will do all of the above. | |
# | |
# The script can be executed either manually or | |
# as a cron job. E.g to run backup every twenty minutes during the day and | |
# every two hours at night, | |
# | |
#3,23,43 8-23 * * * root /etc/backup sync > /var/log/backup_sync.lastlog | |
#3 0-7/2 * * * root /etc/backup sync > /var/log/backup_sync.lastlog | |
# | |
# Make snapshot of backup tree every two hours 6 min past the hour | |
# | |
#6 */2 * * * root /etc/backup snapshot > /var/log/backup_snapshot.lastlog | |
# | |
# Weed-out superfluous snapshots every night 1:09 | |
#9 1 * * * root /etc/backup weedout > /var/log/backup_weedout.lastlog# | |
# | |
# | |
# The cronjob versions require that ssh and rsync can access the | |
# fileserver without prompting for a password. | |
# This can be accomplished by using ssh-agent or ssh-add with a | |
# blank passphrase | |
# | |
# Ole Nielsen ANU, May 2002 | |
####################################################################### | |
# User Configurable Parameters | |
# DESTINATION | |
remotedir = '/home/it/backup/TRAITOR' # Remote directory | |
username = 'it' # Remote username | |
fileserver = 'it-nepal' | |
#fileserver = '66.30.14.184' | |
# SOURCE - Specify directories to backup (without trailing slash) | |
backup_dirs = ['/Users'] | |
# General exclude patterns. See man rsync for details on usage. | |
exclude_list = ["*~", ".*~", "#*#", ".#*", "*.o", "Dropbox", "Downloads", "Music", ".Trash", ".dropbox", "Library", "Public", "Photos", "Movies"] | |
# Exclude everything in /var except log and spool/mail | |
exclude_list += [] | |
include_list = [] | |
# GENERAL CONFIGURATION | |
use_gnu = 0 # Set to 1 only if using GNU on remote system | |
dryrun = 0 # Don't actually do it - for testing purposes | |
verbose = 2 # Verbose output. 0: Nothing, 1: Some, 2: Everything | |
# Delays (in each timeslot: year, month, week, day, hour) before | |
# old backups get weeded out. For example [0,0,0,0,0]. | |
delay = [15,5,2,2,1] | |
# Base file name for temporary storage on local host | |
tmpfile_basename = '/tmp/backup_files' | |
############################################################### | |
# The program | |
############################################################### | |
# Get commandline arguments | |
import sys | |
if len(sys.argv) == 2: | |
arg = sys.argv[1] | |
else: | |
#arg = 'all' | |
arg = 'sync' #Default to sync (quick) | |
# Aliases | |
# | |
if arg == 'rsync': arg = 'sync' | |
if arg == 'weed': arg = 'weedout' | |
if arg == 'snap': arg = 'snapshot' | |
if arg == 'rotate': arg = 'snapshot' | |
if arg == 'stat': arg = 'stats' | |
errmsg = 'Invalid command line argument %s' %arg | |
assert arg in ['all', 'sync', 'snapshot', 'weedout', 'stats'], errmsg | |
####################################################################### | |
# | |
def make_timeslot(time_tuple): | |
"""Return [year, month, week, day, hour] | |
Must be organised from slow to fast measure. | |
""" | |
return [time_tuple[0], time_tuple[1], int(time_tuple[7] / 7),\ | |
time_tuple[2], time_tuple[3]] | |
# Has to be consitent with make_timeslot for reporting purposes! | |
timeslot_names = ['yearly', 'monthly', 'weekly', 'daily', 'hourly'] | |
####################################################################### | |
# Backup machinery | |
import sys, time, os, string | |
t_start = time.time() | |
# Form destination dir based on current time stamp | |
# | |
time_tuple = time.localtime(t_start) | |
time_stamp = time.strftime('%Y-%m-%d_%H:%M:%S',time_tuple) | |
destination = remotedir + '/' + 'Backup.' + time_stamp | |
most_recent_hardlink = remotedir + '/MOST_RECENT' | |
tmpfile = tmpfile_basename + '.' + time_stamp | |
if use_gnu: | |
copycmd = 'cp -al %s %s' %(most_recent_hardlink, destination) | |
else: | |
copycmd = 'mkdir %s; cd %s && find . -print | cpio -dpl %s'\ | |
%(destination, most_recent_hardlink, destination) | |
#Verify existence of remote account | |
# | |
remote_account = username + '@' + fileserver | |
exitcode = os.system('ssh %s "hostname" >/dev/null' %remote_account) | |
if exitcode != 0: | |
raise "Remote account %s could not be accessed" %remote_account | |
#Verify existence of remote backup directory. Create if necessary | |
# | |
exitcode = os.system('ssh %s "cd %s"' %(remote_account, remotedir)) | |
if exitcode != 0: | |
exitcode = os.system('ssh %s "mkdir %s"' %(remote_account, remotedir)) | |
if exitcode != 0: | |
raise 'Could not create remote directory %s' %remotedir | |
if arg in ['sync', 'snapshot', 'all']: | |
# Check that previous backup is finished | |
# | |
lockfile = '/var/lock/backup.lock' | |
try: | |
fid = open(lockfile,'r') | |
backup_time = fid.readline().strip() | |
fid.close() | |
backup_in_progress = 1 | |
except: | |
backup_in_progress = 0 | |
if backup_in_progress: | |
s = "%s: Rotating backups still in progress since %s.\n"\ | |
%(time_stamp, backup_time) | |
s += "Please wait till previous backup has completed.\n" | |
s += "If this is wrong, please delete %s and try again." %lockfile | |
# raise s | |
else: | |
pass | |
# Make lock | |
if verbose > 0: print 'Making lock file %s' %lockfile | |
fid = open(lockfile, 'w') | |
fid.write(arg + ':' + time_stamp + '\n') | |
fid.close() | |
# Write to log file | |
# | |
try: | |
fid = open('/var/log/backup.log', 'a') | |
fid.write('Started backup %s at %s\n' %(arg, time.asctime())) | |
fid.close() | |
except: | |
pass | |
############################################3 | |
# RSYNC STUFF | |
if arg in ['sync', 'snapshot', 'all']: | |
# Form include/exclude string and options | |
# | |
exclude_string = '' | |
for pattern in exclude_list: | |
exclude_string += '--exclude "%s" ' %pattern | |
include_string = '' | |
for pattern in include_list: | |
include_string += '--include "%s" ' %pattern | |
rsync_long_options = '%s %s --delete --delete-excluded --delete-after'\ | |
%(include_string, exclude_string) | |
if verbose == 2: | |
rsync_short_options = '-azv' | |
else: | |
rsync_short_options = '-az' | |
remote_account = username + '@' + fileserver | |
###########################################################3 | |
# Start backing up | |
# | |
for dir in backup_dirs: | |
cmd = 'rsync %s %s %s %s@%s:%s'\ | |
%(rsync_short_options, rsync_long_options, dir,\ | |
username, fileserver, most_recent_hardlink) | |
if verbose > 0: | |
print cmd | |
if not dryrun: | |
exitcode = os.system(cmd) | |
if exitcode != 0 and verbose > 0: | |
print '\nWARNING (backup): Problems copying directory %s to %s'\ | |
%(dir, fileserver) | |
print ' This can for example happen if user' | |
print ' is not allowed to read all of %s' %(dir) | |
# Update time stamp on newly created backup | |
# | |
cmd = 'ssh %s "touch %s"' %(remote_account, most_recent_hardlink) | |
exitcode = os.system(cmd) | |
if verbose > 0: print "rsync completed in %d seconds" %(time.time() - t_start) | |
############################################3 | |
# SNAPSHOT | |
if arg in ['snapshot', 'all']: | |
t_snap = time.time() | |
# Make hard links from most recent backup to name with time stamp | |
# | |
cmd = 'ssh %s "%s"' %(remote_account, copycmd) | |
if verbose > 0: | |
print cmd | |
exitcode = os.system(cmd) | |
if verbose > 0: print "hardlink rotation completed in %d seconds"\ | |
%(time.time() - t_snap) | |
############################################3 | |
# WEEDOUT | |
if arg in ['weedout', 'all']: | |
#House keeping - weed out among older backups | |
# | |
t_weed = time.time() | |
current_timeslot = make_timeslot(time_tuple) | |
# Create a dictionary for each timeslot | |
timedict = [] | |
for c in current_timeslot: | |
timedict.append({}) | |
keeplist = [] | |
# Get all backup directories and extract their time stamps | |
# | |
cmd = 'ssh %s "%s" > %s' %(remote_account, 'ls %s' %remotedir, tmpfile) | |
exitcode = os.system(cmd) | |
fid = open(tmpfile, 'r') | |
for line in fid.readlines(): | |
filename = line.strip() | |
field_list = filename.split('.') #Extract extension (time stamp) | |
processed = 0 | |
if len(field_list) > 1: | |
stamp = field_list[1] | |
if stamp is not None: | |
try: | |
time_tuple = time.strptime(stamp, '%Y-%m-%d_%H:%M:%S') | |
except: | |
print 'Warning:' + stamp + ' could not be parsed' | |
break | |
# Organise files in the various time slots | |
# ordered as year, month, week, day, hour | |
# as specified in make_timeslot | |
# | |
timeslot = make_timeslot(time_tuple) | |
for i in range(len(timeslot)): | |
if timeslot[i] < current_timeslot[i] - delay[i]: | |
# File is older than current time slot - delay, | |
# put it into appropriate slot. | |
if not timedict[i].has_key(timeslot[i]): | |
timedict[i][timeslot[i]] = {} | |
timedict[i][timeslot[i]][filename] = time.mktime(time_tuple) | |
processed = 1 | |
break # Do not enter file into more than one slot | |
if not processed: keeplist.append(filename) | |
# Keep only the newest from each list | |
# | |
delete_files = [] | |
i = 0 | |
for ttt in timedict: | |
for flist in ttt.values(): | |
if len(flist) > 0: | |
# Sort | |
# | |
V = flist.values() | |
F = flist.keys() | |
A = zip(V,F) | |
A.sort() | |
keepfile = A[-1][1] | |
tobe_deleted = map(lambda x: x[1], A[:-1]) | |
print 'Expired time slot (%s)' %timeslot_names[i] | |
if len(tobe_deleted) > 0: | |
print ' Delete: ', tobe_deleted | |
print ' Keep: ', keepfile | |
keeplist.append(keepfile) | |
delete_files += tobe_deleted #Accumulate | |
i += 1 | |
print 'To be deleted:' | |
print delete_files | |
print 'To be kept:' | |
print keeplist | |
# Delete superfluous files | |
# | |
if len(delete_files) > 0: | |
delete_string = string.join(delete_files) | |
cmd = 'ssh %s "cd %s; /bin/rm -rf %s"'\ | |
%(remote_account, remotedir, delete_string) | |
if verbose > 0: print cmd | |
if not dryrun: os.system(cmd) | |
if verbose > 0: print "superfluous files deleted in %d seconds"\ | |
%(time.time() - t_weed) | |
############################################3 | |
# STATS | |
if arg in ['stats', 'all']: | |
# Get some stats on disk usage | |
# | |
t_stats = time.time() | |
# Number of backups | |
cmd = 'ssh %s "%s" > %s' %(remote_account, 'ls %s' %remotedir, tmpfile) | |
exitcode = os.system(cmd) | |
fid = open(tmpfile, 'r') | |
files = fid.readlines() | |
fid.close() | |
print 'You currently have %d backups' %(len(files)) | |
cmd = 'ssh %s "%s" > %s' %(remote_account, 'du -sh %s/MOST_RECENT' %remotedir, tmpfile) | |
if verbose: print cmd | |
exitcode = os.system(cmd) | |
cmd = 'ssh %s "%s" >> %s' %(remote_account, 'du -sh %s' %remotedir, tmpfile) | |
if verbose: print cmd | |
exitcode = os.system(cmd) | |
fid = open(tmpfile, 'r') | |
lines = fid.readlines() | |
fid.close() | |
if len(lines) == 2: | |
size1 = lines[0].strip().split()[0] | |
size2 = lines[1].strip().split()[0] | |
print 'Size of latest backup: %s ' %size1 | |
print 'Size of all backups: %s ' %size2 | |
if verbose > 0: print "Stats obtained in %d seconds"\ | |
%(time.time() - t_stats) | |
################################################# | |
try: | |
fid = open('/var/log/backup.log', 'a') | |
fid.write('Finished backup %s in %d seconds at %s\n'\ | |
%(arg, time.time()-t_start,time.asctime())) | |
fid.close() | |
except: | |
print 'WARNING: Could not open log file /var/log/backup.log' | |
if arg in ['sync', 'snapshot', 'all']: | |
# Remove lockfile | |
# | |
os.system('/bin/rm -r %s' %lockfile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment