Skip to content

Instantly share code, notes, and snippets.

@ankurcha
Last active October 23, 2015 06:11
Show Gist options
  • Select an option

  • Save ankurcha/9355630 to your computer and use it in GitHub Desktop.

Select an option

Save ankurcha/9355630 to your computer and use it in GitHub Desktop.
Script to backup tokumx database to a directory and/or S3.
{
"percent" : 6.297823041677475,
"bytesDone" : NumberLong("22117130240"),
"files" : {
"done" : 8,
"total" : 166
},
"current" : {
"source" : "/var/lib/mongodb_historical11/local_oplog_rs_p15_id__ddfdbfc0_1_19.tokumx",
"dest" : "/backup11/local_oplog_rs_p15_id__ddfdbfc0_1_19.tokumx",
"bytes" : {
"done" : NumberLong("16735272960"),
"total" : NumberLong("31608274944")
}
},
"ok" : 1
}
#!/usr/bin/env python2
"""
This utility takes a point in time backup using the hotbackup plugin.
@authon Ankur Chauhan <[email protected]>
"""
import logging
import sys
import os
import optparse
import time
import threading
import pymongo
import boto
import boto.s3
from boto.s3.key import Key
class TokuMXBackup:
def __init__(self, host="localhost:27017", dest="./", access_key=None, secret_key=None, bucket_name=None, prefix=None):
logging.debug("connecting to %s..." % host)
self.client = pymongo.MongoClient(host)
self.db = self.client['admin']
self.destination = os.path.abspath(dest)
if access_key and secret_key and bucket and prefix:
self.s3 = boto.connect_s3(access_key, secret_key)
self.bucket = conn.create_bucket(bucket_name, location=boto.s3.connection.Location.DEFAULT)
self.prefix = prefix
def run(self):
logging.info("Starting backup to %s", self.destination)
self.db.command('backupStart', self.destination)
def status(self):
logging.info("Backup progress ... ")
self.db.command("backupStatus", check=False)
def archive(self):
logging.info("Archiving to AWS S3")
for path,dir,files in os.walk(self.destination):
for file in files:
k = Key(self.bucket)
k.key = self.prefix + "/" + os.path.relpath(os.path.join(path, file), self.destination)
k.set_contents_from_filename(os.path.join(path, file))
def main():
parser = optparse.OptionParser(usage="usage: %prog [options] <host:port>\n\n" + __doc__)
parser.add_option("-s", "--sleeptime", dest="sleeptime", default=10, type=int,
help="duration to sleep between reports [default: %default]", metavar="TIME")
parser.add_option("-d", "--destination", dest="destination", default="./",
help="destination to save the backup to [default: current directory]")
parser.add_option("-f", "--force", help ="Delete contents of destination before starting backup", dest="force")
parser.add_option("-a", "--archive", help="Archive to S3", default=False, dest="archive")
parser.add_option("-b", "--bucket", help="S3 bucket to use", default=None, dest="bucket")
parser.add_option("-p", "--prefix", help="S3 prefix to save backup", default="/", dest="prefix")
parser.add_option("-k", "--access_key", help="S3 Access Key", default=None, dest="access_key")
parser.add_option("-s", "--secret_key", help="S3 Secret Key", default=None, dest="secret_key")
(opts, args) = parser.parse_args()
if len(args) > 1:
parser.error("too many arguments")
if opts.sleeptime < 1:
parser.error("invalid --sleeptime: %d" % opts.sleeptime)
if os.path.isdir(opts.destination) and len(os.listdir(opts.destination)) != 0:
if not opts.force:
parser.error("destination directory is not empty")
else:
# delete contents
fileList = os.listdir(opts.destination)
for fileName in fileList:
os.remove(opts.destination + "/" + fileName)
if not os.path.isdir(opts.destination):
# create directory
os.mkdir(opts.destination, 0666)
if opts.archive:
logging.info("Backup will be archived to S3")
if !opts.access_key || !opts.secret_key || !opts.bucket || !opts.prefix:
parser.error("Must specify S3 Parameters: access_key, secret_key, bucket, prefix")
return -1
host = "localhost:27017"
if len(args) == 1:
host = args[0]
try:
backup = TokuMXBackup(host, opts.destination)
except:
logging.exception('error connecting to %s', host)
return 1
logging.info("connected to %s", host)
# check if backup is running
backupThread = threading.Thread(target = backup.run)
s = backup.status()
if s["ok"] == 0 and s["errmsg"] == "no backup running":
backupThread.start()
try:
while 1:
time.sleep(int(opts.sleeptime))
try:
if not backupThread.is_alive():
logging.info("Backup completed")
if opts.archive:
try:
backup.archive()
except Exception as e:
logging.error("Unable to archive to S3", e)
return -2
return 0
s = backup.status()
if s["ok"] == 0 and s["errmsg"] == "no backup running":
return 0
else
logging.info("Percent: %d \t Bytes done: %d", s["percent"], s["bytesDone"])
except:
logging.exception('error getting backup status, backup may be running')
return 2
except KeyboardInterrupt:
logging.info("disconnecting")
return 0
sys.exit(main())
@ankurcha
Copy link
Author

ankurcha commented Mar 6, 2014

I think the next iteration would include rsync to remote host option.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment