Created
May 16, 2013 23:20
-
-
Save denji/5595886 to your computer and use it in GitHub Desktop.
The following ruby script compares two directories recursively, and alerts the user of any differences. It compares files by size and (optionally) by a random sample of contents. The results are summarized into a difference percentage so it can be used to easily determine if a backup is valid and recent.
https://defuse.ca/backup-verify-script.htm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Author: havoc | |
# WWW: https://defuse.ca/backup-verify-script.htm | |
# Date: Jul 28, 2012 | |
# License: Public domain / Do whatever you want. | |
# | |
# Backup validator script. Compares two folders "original" and "backup". | |
# Alerts the user of any files or directories that are in "original" but not in | |
# "backup" (extra files in "backup" are ignored). If a file exists in both | |
# "original" and "backup," they are compared by checking their lengths and by a | |
# random sample of their contents, and the user is alerted if they differ. | |
# | |
# Output prefixes: | |
# DIR: - Directory in original missing from backup. | |
# FILE: - File in original missing from, or different, in backup. | |
# SKIP: - Skipping directory specified by --ignore. | |
# SYMLINK: - Symlink to directory skipped and not not following (no --follow). | |
# DIFFS - Not recursing into dir because it is on a different filesystem. | |
# ERROR: - Error reading file or directory. | |
# DEBUG: - Debug information only shown when called with --verbose. | |
require 'optparse' | |
# The number of bytes to compare during each random sample comparison. | |
SampleSize = 32 | |
############################################################################### | |
# Command Line Option Parsing # | |
############################################################################### | |
$options = {} | |
optparse = OptionParser.new do |opts| | |
opts.banner = "Usage: #{__FILE__} [options] <original> <backup>\n" | |
$options[:verbose] = false | |
opts.on( '-v', '--verbose', 'Print what is being done' ) do | |
$options[:verbose] = true | |
end | |
$options[:machine] = false | |
opts.on( '-m', '--machine', "Output summary in machine-readable format" ) do | |
$options[:machine] = true | |
end | |
# By default, don't follow symlinks, so we don't end up in infinite loops. | |
# The user can override this behaviour if they know there are no loops. | |
$options[:follow] = false | |
opts.on( '-f', '--[no-]follow', 'Follow symlinks' ) do |val| | |
$options[:follow] = val | |
end | |
# Set this option to NOT cross filesystem boundaries. | |
$options[:one_filesystem] = false | |
opts.on( '-x', '--one-filesystem', 'Stay on one file system (in <original>)' ) do |val| | |
$options[:one_filesystem] = true | |
end | |
# If a folder in original doesn't exist in backup, the number of items in | |
# the folder will be counted and added to the diff total if invoked with -c | |
$options[:count] = false | |
opts.on( '-c', '--count', 'Count files in unmatched directories' ) do | |
$options[:count] = true | |
end | |
# Ignored directories can be specified either as a subfolder of original or | |
# backup. The option can be specified multiple times. | |
$options[:ignore] = [] | |
opts.on( '-i', '--ignore DIR', "Don't process DIR" ) do |ignore| | |
$options[:ignore] << File.expand_path( ignore ) | |
end | |
$options[:samples] = 0 | |
opts.on( | |
'-s', | |
'--samples COUNT', | |
"Comparison sample count (default: #{$options[:samples]})" | |
) do |count| | |
$options[:samples] = count.to_i | |
end | |
opts.on( '-h', '--help', 'Display this screen' ) do | |
STDOUT.puts opts | |
exit | |
end | |
end | |
begin | |
optparse.parse! | |
rescue OptionParser::InvalidOption | |
STDERR.puts "Invalid option" | |
STDERR.puts optparse | |
exit | |
end | |
if ARGV.length < 2 | |
STDERR.puts "You must specify original and backup folders." | |
STDERR.puts optparse | |
exit | |
end | |
$original = File.expand_path( ARGV[0] ) | |
$backup = File.expand_path( ARGV[1] ) | |
[$original, $backup].each do |dir| | |
unless File.directory? dir | |
STDERR.puts "[#{dir}] is not a directory." | |
STDERR.puts optparse | |
exit | |
end | |
end | |
STDERR.puts "WARNING: Comparing a directory to itself." if $original == $backup | |
############################################################################### | |
# Directory Comparison # | |
############################################################################### | |
# Global variables to hold statistics for the summary report at the end. | |
$diffCount = 0 | |
$itemCount = 0 | |
$skippedCount = 0 | |
$errorCount = 0 | |
# Returns true if fileA and fileB both exist, both are the same size, and pass | |
# the random sample comparison test. | |
def sameFile( fileA, fileB ) | |
# If symlinks, make sure they link to the same thing. | |
if File.symlink?( fileA ) || File.symlink?( fileB ) | |
return false unless File.symlink?( fileA ) and File.symlink?( fileB ) | |
linkA = File.readlink( fileA ) | |
linkB = File.readlink( fileB ) | |
return linkA == linkB | |
end | |
# Both exist. | |
return false unless File.exists?( fileA ) and File.exists?( fileB ) | |
# Both are the same size. | |
aBytes = File.stat( fileA ).size | |
bBytes = File.stat( fileB ).size | |
return false unless aBytes == bBytes | |
# Random sample comparison. | |
same = true | |
$options[:samples].times do | |
start = rand( aBytes ) | |
length = [aBytes, start + SampleSize].min - start + 1 | |
aSample = File.read( fileA, length, start ) | |
bSample = File.read( fileB, length, start ) | |
same = same && aSample == bSample | |
end | |
return same | |
rescue | |
STDOUT.puts "ERROR: Can't read file [#{fileA}]" | |
$errorCount += 1 | |
return true # So we don't get two messages for the same file | |
end | |
# Returns the number of items in the directory (and subdirectories of) 'dir' | |
def countItems( dir ) | |
if $options[:verbose] | |
STDOUT.puts "DEBUG: Counting files in [#{dir}]" | |
end | |
count = 0 | |
Dir.foreach( dir ) do |item| | |
next if item == "." or item == ".." | |
count += 1 | |
fullPath = File.join( dir, item ) | |
count += countItems( fullPath ) if File.directory? fullPath | |
end | |
return count | |
end | |
# Recursively compare directories specified by a path relative to $original and | |
# $backup. | |
def compareDirs( relative = "" ) | |
# Combine the base path with the relative path | |
original = File.expand_path( File.join( $original, relative ) ) | |
backup = File.expand_path( File.join( $backup, relative ) ) | |
if $options[:verbose] | |
STDOUT.puts "DEBUG: Comparing [#{original}] to [#{backup}]" | |
end | |
# Return if this directory has been excluded | |
if $options[:ignore].include?( original ) or $options[:ignore].include?( backup ) | |
$skippedCount += 1 | |
STDOUT.puts "SKIP: Skipping comparison of [#{original}] and [#{backup}]" | |
return | |
end | |
# Make sure both directories exist | |
unless File.directory?( original ) and File.directory?( backup ) | |
STDOUT.puts "DIR: [#{original}] not found in [#{backup}]" | |
$diffCount += 1 | |
$diffCount += countItems( original ) if $options[:count] | |
return | |
end | |
# If both directories exist, we check their contents | |
begin | |
Dir.foreach( original ) do |item| | |
next if item == "." or item == ".." | |
$itemCount += 1 | |
origPath = File.join( original, item ) | |
backupPath = File.join( backup, item ) | |
if File.directory? origPath | |
# Skip symlinks if told to do so... | |
if File.symlink?( origPath ) and not $options[:follow] | |
$skippedCount += 1 | |
STDOUT.puts "SYMLINK: [#{origPath}] skipped." | |
next | |
end | |
# Stay on one filesystem if told to do so... | |
outerDev = File::Stat.new( original ).dev | |
innerDev = File::Stat.new( origPath ).dev | |
if outerDev != innerDev and $options[:one_filesystem] | |
$skippedCount += 1 | |
STDOUT.puts "DIFFFS: [#{origPath}] is on a different file system. Skipped." | |
next | |
end | |
compareDirs( File.join( relative, item ) ) | |
else # It's a file | |
unless sameFile( origPath, backupPath ) | |
$diffCount += 1 | |
STDOUT.puts "FILE: [#{origPath}] not found at, or doesn't match [#{backupPath}]" | |
end | |
end | |
end # Dir.foreach | |
rescue Errno::EACCES | |
STDOUT.puts "ERROR: Can't read directory [#{original}]" | |
$errorCount += 1 | |
end | |
end # compareDirs | |
def printSummary | |
differPercent = "%.2f" % ($diffCount.to_f / $itemCount.to_f * 100) | |
if $options[:machine] | |
STDOUT.puts "SUMMARY: items:#{$itemCount}, diff:#{$diffCount}, " + | |
"diffpct:#{differPercent}, skip:#{$skippedCount}, " + | |
"err:#{$errorCount}" | |
else | |
STDOUT.puts "\nSUMMARY:" | |
STDOUT.puts " Items processed: #{$itemCount}" | |
STDOUT.puts " Differences: #{$diffCount} (#{differPercent}%)" | |
STDOUT.puts " Similarities: #{$itemCount - $diffCount}" | |
STDOUT.puts " Skipped: #{$skippedCount}" | |
STDOUT.puts " Errors: #{$errorCount}" | |
end | |
end | |
# Exit gracefully on CTRL+C | |
trap( "SIGINT" ) do | |
STDOUT.puts "\n\nCaught SIGINT. Stopping." | |
printSummary | |
exit | |
end | |
compareDirs | |
printSummary | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment