Last active
December 15, 2015 12:09
-
-
Save rmyers/5258299 to your computer and use it in GitHub Desktop.
Manual streaming backup of a CloudDB Instance to CloudFiles
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python2.6 | |
""" | |
Manual Backup Script | |
==================== | |
This script is called by the agent to run a backup and stream to swift. | |
Setup Requires: | |
* Python >= 2.6 | |
* Requests | |
Example:: | |
$ backup.py \ | |
--auth_url=http://auth.me \ | |
--auth_user=you \ | |
--auth_key=shhh \ | |
-u dbuser \ | |
-h mydb.host.com \ | |
-p mydbpassword \ | |
-f filename_to_upload | |
Run a Test:: | |
$ cat /dev/urandom > /tmp/largefile | |
(wait a little bit then hit ^C) | |
$ backup.py --runner=test -f test | |
""" | |
import os | |
import sys | |
import requests | |
import subprocess | |
import hashlib | |
# CHUNK_SIZE should be a multiple of 128 since this is the size of an md5 | |
# digest block this allows us to update that while streaming the file. | |
# http://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python | |
CHUNK_SIZE = 2**16 | |
MAX_FILE_SIZE = 4*(1024**2) | |
CONTAINER_NAME = 'z_CLOUDDB_BACKUPS' | |
AUTH_URL = 'https://identity.api.rackspacecloud.com/v1.0' | |
API_USER = os.environ.get('API_USER') | |
API_KEY = os.environ.get('API_KEY') | |
def size(num_bytes): | |
"""Human readable filesize""" | |
mb = 1024.0**2 | |
return round(num_bytes / mb, 1) | |
class BackupError(Exception): | |
"""Error running the Backup Command.""" | |
class SwiftAuth(object): | |
def __init__(self, authurl, user, key): | |
self.user = user | |
self.key = key | |
self.authurl = authurl | |
self.token, self.url = self.get_auth() | |
def __call__(self, request): | |
request.headers['X-Auth-Token'] = self.token | |
request.headers['X-Storage-Url'] = self.url | |
return request | |
def get_auth(self): | |
headers = { | |
'X-Auth-User': self.user, | |
'X-Auth-Key': self.key | |
} | |
auth = requests.get(self.authurl, headers=headers) | |
token = auth.headers.get('X-Auth-Token') | |
url = auth.headers.get('X-Storage-Url') | |
return token, url | |
class SwiftConnection(object): | |
def __init__(self, authurl, user, key): | |
self.session = requests.Session() | |
self.session.auth = SwiftAuth(authurl, user, key) | |
self.container = '%s/%s' % (self.session.auth.url, CONTAINER_NAME) | |
# create our container | |
self.session.put(self.container) | |
def put_object(self, name, content, headers=None): | |
url = '%s/%s' % (self.container, name) | |
return self.session.put(url, content, headers=headers) | |
def head_object(self, name): | |
url = '%s/%s' % (self.container, name) | |
resp = self.session.head(url) | |
return resp.headers | |
class DatabaseBackup(object): | |
""" | |
Call out to subprocess and stream the output to a new swift file. | |
""" | |
# The actual system call to run the backup | |
cmd = 'cat /tmp/largefile' | |
def __init__(self, filename, **kwargs): | |
self.filename = filename | |
# how much we have written | |
self.content_length = 0 | |
self.segment_length = 0 | |
self.process = None | |
self.writer = None | |
self.file_number = 0 | |
self.written = -1 | |
self.end_of_file = False | |
self.end_of_segment = False | |
self.checksum = hashlib.md5() | |
self.schecksum = None | |
self.command = self.cmd % kwargs | |
def run(self): | |
self.process = subprocess.Popen(self.command, shell=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
def __enter__(self): | |
"""Start up the process""" | |
self.run() | |
return self | |
def __exit__(self, exc_type, exc_value, traceback): | |
"""Clean up everything.""" | |
if exc_type is None: | |
# See if the process reported an error | |
try: | |
err = self.process.stderr.read() | |
if err: | |
raise BackupError(err) | |
except OSError: | |
pass | |
# Make sure to terminate the process | |
try: | |
self.process.terminate() | |
except OSError: | |
# Already stopped | |
pass | |
def status(self, num_bytes): | |
"""Write out the number of bytes written and the filename""" | |
written = size(num_bytes) | |
if written != self.written: | |
self.written = written | |
sys.stdout.write('\b'*80) | |
sys.stdout.write('Uploaded: %g MB to %s' % (written, self.segment)) | |
sys.stdout.flush() | |
@property | |
def segment(self): | |
return '%s_%08d' % (self.filename, self.file_number) | |
@property | |
def manifest(self): | |
return '%s.gz' % self.filename | |
@property | |
def prefix(self): | |
return '%s/%s_' % (CONTAINER_NAME, self.filename) | |
def chunker(self): | |
"""Generator to read the output of the backup command.""" | |
self.segment_length = 0 | |
self.end_of_segment = False | |
self.schecksum = hashlib.md5() | |
while not self.end_of_file: | |
# Upload to a new file if we are starting or too large | |
if self.segment_length > (MAX_FILE_SIZE - CHUNK_SIZE): | |
self.file_number += 1 | |
self.end_of_segment = True | |
break | |
# Read 128 bytes, since this is the size of an md5 digest block | |
# this allows us to update that while streaming the file. | |
# http://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python | |
chunk = self.process.stdout.read(CHUNK_SIZE) | |
if not chunk: | |
self.end_of_file = True | |
break | |
self.checksum.update(chunk) | |
self.schecksum.update(chunk) | |
self.content_length += len(chunk) | |
self.segment_length += len(chunk) | |
self.status(self.segment_length) | |
yield chunk | |
class MySQLBackup(DatabaseBackup): | |
cmd = '/usr/bin/mysqldump'\ | |
' --all-databases'\ | |
' --opt'\ | |
' --compact'\ | |
' -h %(host)s '\ | |
'--password=%(password)s'\ | |
' -u %(user)s'\ | |
' | gzip' | |
class XtraBackup(DatabaseBackup): | |
cmd = 'xtrabackup --args TBD' | |
class TestBackup(DatabaseBackup): | |
cmd = 'cat /tmp/largefile' | |
if __name__ == '__main__': | |
from optparse import OptionParser | |
parser = OptionParser() | |
parser.add_option("-f", "--file", dest="filename", | |
help="write report to FILE", metavar="FILE") | |
parser.add_option("-s","--host", dest="host") | |
parser.add_option("-p", "--password", dest="password", | |
help="Database Password") | |
parser.add_option("-u", "--user", dest="user", | |
help="Database User") | |
parser.add_option("-a", "--auth_url", dest="auth_url", | |
help="Auth URL", default=AUTH_URL) | |
parser.add_option("--auth_user", dest="auth_user", | |
help="Auth User", default=API_USER) | |
parser.add_option("--auth_key", dest="auth_key", | |
help="Auth Key", default=API_KEY) | |
parser.add_option('--runner', dest="runner", | |
help="Backup command to run (mysqldump, xtrabackup)", | |
default="mysqldump") | |
(options, args) = parser.parse_args() | |
if options.runner == 'mysqldump': | |
Runner = MySQLBackup | |
elif options.runner == 'xtrabackup': | |
Runner = XtraBackup | |
elif options.runner == 'test': | |
Runner = TestBackup | |
else: | |
parser.error("Unknown Runner") | |
connection = SwiftConnection(authurl=options.auth_url, | |
user=options.auth_user, | |
key=options.auth_key) | |
with Runner(filename=options.filename, | |
host=options.host, | |
user=options.user, | |
password=options.password) as backup: | |
print("Running backup!") | |
while not backup.end_of_file: | |
segment = backup.segment | |
connection.put_object(segment, | |
backup.chunker()) | |
headers = connection.head_object(segment) | |
print('\nSegment Checksum: %s' % backup.schecksum.hexdigest()) | |
print('ETag: %s' % headers['ETag']) | |
print("Total size: %s" % size(backup.content_length)) | |
print('Checksum: %s' % backup.checksum.hexdigest()) | |
connection.put_object(backup.manifest, | |
content='', | |
headers={'X-Object-Manifest': backup.prefix}) | |
import pprint | |
pprint.pprint(connection.head_object(backup.manifest)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment