Created
August 18, 2010 09:04
-
-
Save freegenie/534099 to your computer and use it in GitHub Desktop.
Backup solution for mongodb which dumps gridfs items on filesystem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# This script is a backup tool for mongodb. | |
# | |
# The main purpose is to dump files from gridfs to filesystem, | |
# to take advantage of backup solutions based on tools like | |
# rsync. | |
# Developed against mongo 1.6.0 | |
# | |
# The directory we want to create is | |
# root_backup_dir | |
# \_____ gridfs_dump | |
# \_____ on dir per database with extracted files | |
# \______ classic mongo dump | |
# \______ one dir per database with BSON files | |
# | |
# Usage: | |
# backup.rb dump dump_path [database,database] | |
# backup.rb restore dump_path [database,database] | |
# | |
# Example: | |
# ruby backup.rb dump . my_database | |
# ruby backup.rb restore . my_database | |
# | |
# | |
# TODO: | |
# - A real opt parsing. | |
# - Add cleanup of files which are no longer in fs.files | |
# | |
# Fabrizio Regini 17 Aug 2010 | |
require 'rubygems' | |
require 'mongo' | |
include Mongo | |
Username = 'admin' | |
Password = '1234abcd' | |
AdminDatabase = 'admin' | |
UseAuth = true | |
MongoDump = 'mongodump' | |
MongoRestore = 'mongorestore' | |
MainDir = 'mongo_super_dump' | |
GridFsDumpDir = 'gridfs_dump' | |
MongoDumpDir = 'dump' | |
Command = ARGV[0] | |
DumpPath = ARGV[1] || ( File.join(File.dirname(__FILE__), MainDir ) ) | |
WantedDbs = ARGV[2].nil? ? nil : ARGV[2].split(',') | |
FsChunksCollection = 'fs.chunks' | |
FsFilesCollection = 'fs.files' | |
raise "Command must be 'dump' or 'restore'" unless ['dump', 'restore'].include?(Command) | |
def get_db(db) | |
conn = Connection.new('localhost', nil, :slave_ok => true) | |
db = DB.new(db, conn) | |
db.authenticate(Username, Password) if UseAuth | |
db | |
end | |
def create_directory_if_not_exists(path) | |
begin | |
Dir.mkdir( path ) | |
rescue Errno::EEXIST | |
# continue | |
end | |
end | |
def create_main_dir | |
create_directory_if_not_exists(DumpPath) | |
end | |
def create_gridfs_dir | |
create_directory_if_not_exists(File.join(DumpPath, GridFsDumpDir)) | |
end | |
def create_dump_dir | |
create_directory_if_not_exists(File.join(DumpPath, MongoDumpDir)) | |
end | |
def create_gridfs_db_directory(dbname) | |
create_directory_if_not_exists(gridfs_db_path(dbname)) | |
end | |
def base_dump_path | |
File.join(DumpPath, MongoDumpDir) | |
end | |
def dump_db_path(dbname) | |
# File.join(DumpPath, MongoDumpDir, dbname) | |
File.join(DumpPath, MongoDumpDir, dbname) | |
end | |
def gridfs_db_path(dbname) | |
File.join(DumpPath, GridFsDumpDir, dbname) | |
end | |
def build_file_path(database, file_id) | |
File.join(gridfs_db_path(database), file_id.to_s) | |
end | |
def filename_from_metadata_row(row) | |
filename = "#{row['_id']}-#{row['md5']}" | |
end | |
# If id and md5 match, file did not change | |
def dump_gridfs_file?(database, row) | |
!File.exist?(File.join(gridfs_db_path(database), filename_from_metadata_row(row))) | |
end | |
def dump | |
create_main_dir | |
create_gridfs_dir | |
create_dump_dir | |
# Create dump directory if not exists | |
(WantedDbs || Dbs).each do |db_name| | |
db = get_db(db_name) | |
grid = Grid.new(db) | |
# --------------------- | |
# Call dump without fs.chunks | |
# --------------------- | |
# Strange issue with command line options, password seem to require no space after -p selector | |
mongo_dump_command = "#{MongoDump} -u#{Username} -p#{Password} -d #{db_name} --out #{base_dump_path} " | |
db.collection_names.select {|n| n != FsChunksCollection }.each do |collection| | |
collection_dump_command = "#{mongo_dump_command} -c #{collection} " | |
puts "dumping database and collection: #{db_name}/#{collection}" | |
`#{collection_dump_command}` | |
end | |
# gzip all files | |
`gzip -f #{dump_db_path(db_name)}/*.bson` | |
# --------------------- | |
# Export export fs.chunks | |
# --------------------- | |
create_gridfs_db_directory(db_name) | |
puts "dumping gridfs #{db_name}/#{FsFilesCollection}. Files count: #{db.collection(FsFilesCollection).count} \n" | |
db.collection(FsFilesCollection).find({}, {:snapshot => true}).each do |row| | |
filename = filename_from_metadata_row(row) | |
grid_io = grid.get(row['_id']) | |
# Only dump file if databae source if newer than file date | |
# TODO: optionize this | |
# if grid_io.upload_date > file.ctime | |
if dump_gridfs_file?(db_name, row) | |
file = File.new(build_file_path(db_name, filename), 'w') | |
STDOUT << '.' ; STDOUT.flush | |
file.write grid_io.read | |
file.close | |
else | |
STDOUT << 'x' ; STDOUT.flush | |
end | |
end | |
# --------------------- | |
# Cleanup files from fs if they are no longer in the database | |
# --------------------- | |
Dir.new(gridfs_db_path(db_name)).each do |filename| | |
next if %w(. ..).include?(filename) | |
id, md5 = filename.split('-') | |
if db.collection(FsFilesCollection).find({'_id' => BSON::ObjectID(id)}).count == 0 | |
File.unlink(build_file_path(db_name, filename)) | |
end | |
end | |
end | |
end | |
def restore | |
(WantedDbs || Dbs).each do |db_name| | |
# --------------------- | |
# Import mongodumps with mongorestore | |
# --------------------- | |
db = get_db(db_name) | |
grid = Grid.new(db) | |
mongo_restore_command = "#{MongoRestore} -u#{Username} -p#{Password} -d #{db_name} --drop #{dump_db_path(db_name)}" | |
puts mongo_restore_command | |
puts "importing database from path #{dump_db_path(db_name)} into database #{db_name}" | |
`gunzip -f #{dump_db_path(db_name)}/*.gz` | |
`#{mongo_restore_command}` | |
# --------------------- | |
# Import files from filesystem back into GridFs | |
# --------------------- | |
puts "importing gridfs #{db_name}/#{FsFilesCollection}. Files count: #{db.collection(FsFilesCollection).count} \n" | |
db.collection(FsFilesCollection).find({}, {:snapshot => true}).each do |row| | |
STDOUT << '.' ; STDOUT.flush | |
filename = filename_from_metadata_row(row) | |
file = File.open(build_file_path(db_name, filename), 'r') | |
# TODO: assert the file is readable before to delete the record | |
grid.delete(row['_id']) | |
grid.put(file.read, {:_id => row['_id'], :filename => row['filename'], :content_type => row['contentType'] }) | |
file.close | |
end | |
end | |
end | |
def add_admin_user_to_all_dbs | |
end | |
MainDb = get_db('admin') | |
Dbs = MainDb.connection.database_names | |
add_admin_user_to_all_dbs if UseAuth | |
eval(Command) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment