Skip to content

Instantly share code, notes, and snippets.

@rmyers
Last active December 15, 2015 12:09
Show Gist options
  • Save rmyers/5258299 to your computer and use it in GitHub Desktop.
Save rmyers/5258299 to your computer and use it in GitHub Desktop.
Manual streaming backup of a CloudDB Instance to CloudFiles
#!/bin/env python2.6
"""
Manual Backup Script
====================
This script is called by the agent to run a backup and stream to swift.
Setup Requires:
* Python >= 2.6
* Requests
Example::
$ backup.py \
--auth_url=http://auth.me \
--auth_user=you \
--auth_key=shhh \
-u dbuser \
-h mydb.host.com \
-p mydbpassword \
-f filename_to_upload
Run a Test::
$ cat /dev/urandom > /tmp/largefile
(wait a little bit then hit ^C)
$ backup.py --runner=test -f test
"""
import os
import sys
import requests
import subprocess
import hashlib
# CHUNK_SIZE should be a multiple of 128 since this is the size of an md5
# digest block this allows us to update that while streaming the file.
# http://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python
CHUNK_SIZE = 2**16
MAX_FILE_SIZE = 4*(1024**2)
CONTAINER_NAME = 'z_CLOUDDB_BACKUPS'
AUTH_URL = 'https://identity.api.rackspacecloud.com/v1.0'
API_USER = os.environ.get('API_USER')
API_KEY = os.environ.get('API_KEY')
def size(num_bytes):
"""Human readable filesize"""
mb = 1024.0**2
return round(num_bytes / mb, 1)
class BackupError(Exception):
"""Error running the Backup Command."""
class SwiftAuth(object):
def __init__(self, authurl, user, key):
self.user = user
self.key = key
self.authurl = authurl
self.token, self.url = self.get_auth()
def __call__(self, request):
request.headers['X-Auth-Token'] = self.token
request.headers['X-Storage-Url'] = self.url
return request
def get_auth(self):
headers = {
'X-Auth-User': self.user,
'X-Auth-Key': self.key
}
auth = requests.get(self.authurl, headers=headers)
token = auth.headers.get('X-Auth-Token')
url = auth.headers.get('X-Storage-Url')
return token, url
class SwiftConnection(object):
def __init__(self, authurl, user, key):
self.session = requests.Session()
self.session.auth = SwiftAuth(authurl, user, key)
self.container = '%s/%s' % (self.session.auth.url, CONTAINER_NAME)
# create our container
self.session.put(self.container)
def put_object(self, name, content, headers=None):
url = '%s/%s' % (self.container, name)
return self.session.put(url, content, headers=headers)
def head_object(self, name):
url = '%s/%s' % (self.container, name)
resp = self.session.head(url)
return resp.headers
class DatabaseBackup(object):
"""
Call out to subprocess and stream the output to a new swift file.
"""
# The actual system call to run the backup
cmd = 'cat /tmp/largefile'
def __init__(self, filename, **kwargs):
self.filename = filename
# how much we have written
self.content_length = 0
self.segment_length = 0
self.process = None
self.writer = None
self.file_number = 0
self.written = -1
self.end_of_file = False
self.end_of_segment = False
self.checksum = hashlib.md5()
self.schecksum = None
self.command = self.cmd % kwargs
def run(self):
self.process = subprocess.Popen(self.command, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
def __enter__(self):
"""Start up the process"""
self.run()
return self
def __exit__(self, exc_type, exc_value, traceback):
"""Clean up everything."""
if exc_type is None:
# See if the process reported an error
try:
err = self.process.stderr.read()
if err:
raise BackupError(err)
except OSError:
pass
# Make sure to terminate the process
try:
self.process.terminate()
except OSError:
# Already stopped
pass
def status(self, num_bytes):
"""Write out the number of bytes written and the filename"""
written = size(num_bytes)
if written != self.written:
self.written = written
sys.stdout.write('\b'*80)
sys.stdout.write('Uploaded: %g MB to %s' % (written, self.segment))
sys.stdout.flush()
@property
def segment(self):
return '%s_%08d' % (self.filename, self.file_number)
@property
def manifest(self):
return '%s.gz' % self.filename
@property
def prefix(self):
return '%s/%s_' % (CONTAINER_NAME, self.filename)
def chunker(self):
"""Generator to read the output of the backup command."""
self.segment_length = 0
self.end_of_segment = False
self.schecksum = hashlib.md5()
while not self.end_of_file:
# Upload to a new file if we are starting or too large
if self.segment_length > (MAX_FILE_SIZE - CHUNK_SIZE):
self.file_number += 1
self.end_of_segment = True
break
# Read 128 bytes, since this is the size of an md5 digest block
# this allows us to update that while streaming the file.
# http://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python
chunk = self.process.stdout.read(CHUNK_SIZE)
if not chunk:
self.end_of_file = True
break
self.checksum.update(chunk)
self.schecksum.update(chunk)
self.content_length += len(chunk)
self.segment_length += len(chunk)
self.status(self.segment_length)
yield chunk
class MySQLBackup(DatabaseBackup):
cmd = '/usr/bin/mysqldump'\
' --all-databases'\
' --opt'\
' --compact'\
' -h %(host)s '\
'--password=%(password)s'\
' -u %(user)s'\
' | gzip'
class XtraBackup(DatabaseBackup):
cmd = 'xtrabackup --args TBD'
class TestBackup(DatabaseBackup):
cmd = 'cat /tmp/largefile'
if __name__ == '__main__':
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-f", "--file", dest="filename",
help="write report to FILE", metavar="FILE")
parser.add_option("-s","--host", dest="host")
parser.add_option("-p", "--password", dest="password",
help="Database Password")
parser.add_option("-u", "--user", dest="user",
help="Database User")
parser.add_option("-a", "--auth_url", dest="auth_url",
help="Auth URL", default=AUTH_URL)
parser.add_option("--auth_user", dest="auth_user",
help="Auth User", default=API_USER)
parser.add_option("--auth_key", dest="auth_key",
help="Auth Key", default=API_KEY)
parser.add_option('--runner', dest="runner",
help="Backup command to run (mysqldump, xtrabackup)",
default="mysqldump")
(options, args) = parser.parse_args()
if options.runner == 'mysqldump':
Runner = MySQLBackup
elif options.runner == 'xtrabackup':
Runner = XtraBackup
elif options.runner == 'test':
Runner = TestBackup
else:
parser.error("Unknown Runner")
connection = SwiftConnection(authurl=options.auth_url,
user=options.auth_user,
key=options.auth_key)
with Runner(filename=options.filename,
host=options.host,
user=options.user,
password=options.password) as backup:
print("Running backup!")
while not backup.end_of_file:
segment = backup.segment
connection.put_object(segment,
backup.chunker())
headers = connection.head_object(segment)
print('\nSegment Checksum: %s' % backup.schecksum.hexdigest())
print('ETag: %s' % headers['ETag'])
print("Total size: %s" % size(backup.content_length))
print('Checksum: %s' % backup.checksum.hexdigest())
connection.put_object(backup.manifest,
content='',
headers={'X-Object-Manifest': backup.prefix})
import pprint
pprint.pprint(connection.head_object(backup.manifest))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment