Skip to content

Instantly share code, notes, and snippets.

@denji
Created May 16, 2013 23:20
Show Gist options
  • Save denji/5595886 to your computer and use it in GitHub Desktop.
Save denji/5595886 to your computer and use it in GitHub Desktop.
The following ruby script compares two directories recursively, and alerts the user of any differences. It compares files by size and (optionally) by a random sample of contents. The results are summarized into a difference percentage so it can be used to easily determine if a backup is valid and recent. https://defuse.ca/backup-verify-script.htm
#!/usr/bin/env ruby
# Author: havoc
# WWW: https://defuse.ca/backup-verify-script.htm
# Date: Jul 28, 2012
# License: Public domain / Do whatever you want.
#
# Backup validator script. Compares two folders "original" and "backup".
# Alerts the user of any files or directories that are in "original" but not in
# "backup" (extra files in "backup" are ignored). If a file exists in both
# "original" and "backup," they are compared by checking their lengths and by a
# random sample of their contents, and the user is alerted if they differ.
#
# Output prefixes:
# DIR: - Directory in original missing from backup.
# FILE: - File in original missing from, or different, in backup.
# SKIP: - Skipping directory specified by --ignore.
# SYMLINK: - Symlink to directory skipped and not not following (no --follow).
# DIFFS - Not recursing into dir because it is on a different filesystem.
# ERROR: - Error reading file or directory.
# DEBUG: - Debug information only shown when called with --verbose.
require 'optparse'
# The number of bytes to compare during each random sample comparison.
SampleSize = 32
###############################################################################
# Command Line Option Parsing #
###############################################################################
$options = {}
optparse = OptionParser.new do |opts|
opts.banner = "Usage: #{__FILE__} [options] <original> <backup>\n"
$options[:verbose] = false
opts.on( '-v', '--verbose', 'Print what is being done' ) do
$options[:verbose] = true
end
$options[:machine] = false
opts.on( '-m', '--machine', "Output summary in machine-readable format" ) do
$options[:machine] = true
end
# By default, don't follow symlinks, so we don't end up in infinite loops.
# The user can override this behaviour if they know there are no loops.
$options[:follow] = false
opts.on( '-f', '--[no-]follow', 'Follow symlinks' ) do |val|
$options[:follow] = val
end
# Set this option to NOT cross filesystem boundaries.
$options[:one_filesystem] = false
opts.on( '-x', '--one-filesystem', 'Stay on one file system (in <original>)' ) do |val|
$options[:one_filesystem] = true
end
# If a folder in original doesn't exist in backup, the number of items in
# the folder will be counted and added to the diff total if invoked with -c
$options[:count] = false
opts.on( '-c', '--count', 'Count files in unmatched directories' ) do
$options[:count] = true
end
# Ignored directories can be specified either as a subfolder of original or
# backup. The option can be specified multiple times.
$options[:ignore] = []
opts.on( '-i', '--ignore DIR', "Don't process DIR" ) do |ignore|
$options[:ignore] << File.expand_path( ignore )
end
$options[:samples] = 0
opts.on(
'-s',
'--samples COUNT',
"Comparison sample count (default: #{$options[:samples]})"
) do |count|
$options[:samples] = count.to_i
end
opts.on( '-h', '--help', 'Display this screen' ) do
STDOUT.puts opts
exit
end
end
begin
optparse.parse!
rescue OptionParser::InvalidOption
STDERR.puts "Invalid option"
STDERR.puts optparse
exit
end
if ARGV.length < 2
STDERR.puts "You must specify original and backup folders."
STDERR.puts optparse
exit
end
$original = File.expand_path( ARGV[0] )
$backup = File.expand_path( ARGV[1] )
[$original, $backup].each do |dir|
unless File.directory? dir
STDERR.puts "[#{dir}] is not a directory."
STDERR.puts optparse
exit
end
end
STDERR.puts "WARNING: Comparing a directory to itself." if $original == $backup
###############################################################################
# Directory Comparison #
###############################################################################
# Global variables to hold statistics for the summary report at the end.
$diffCount = 0
$itemCount = 0
$skippedCount = 0
$errorCount = 0
# Returns true if fileA and fileB both exist, both are the same size, and pass
# the random sample comparison test.
def sameFile( fileA, fileB )
# If symlinks, make sure they link to the same thing.
if File.symlink?( fileA ) || File.symlink?( fileB )
return false unless File.symlink?( fileA ) and File.symlink?( fileB )
linkA = File.readlink( fileA )
linkB = File.readlink( fileB )
return linkA == linkB
end
# Both exist.
return false unless File.exists?( fileA ) and File.exists?( fileB )
# Both are the same size.
aBytes = File.stat( fileA ).size
bBytes = File.stat( fileB ).size
return false unless aBytes == bBytes
# Random sample comparison.
same = true
$options[:samples].times do
start = rand( aBytes )
length = [aBytes, start + SampleSize].min - start + 1
aSample = File.read( fileA, length, start )
bSample = File.read( fileB, length, start )
same = same && aSample == bSample
end
return same
rescue
STDOUT.puts "ERROR: Can't read file [#{fileA}]"
$errorCount += 1
return true # So we don't get two messages for the same file
end
# Returns the number of items in the directory (and subdirectories of) 'dir'
def countItems( dir )
if $options[:verbose]
STDOUT.puts "DEBUG: Counting files in [#{dir}]"
end
count = 0
Dir.foreach( dir ) do |item|
next if item == "." or item == ".."
count += 1
fullPath = File.join( dir, item )
count += countItems( fullPath ) if File.directory? fullPath
end
return count
end
# Recursively compare directories specified by a path relative to $original and
# $backup.
def compareDirs( relative = "" )
# Combine the base path with the relative path
original = File.expand_path( File.join( $original, relative ) )
backup = File.expand_path( File.join( $backup, relative ) )
if $options[:verbose]
STDOUT.puts "DEBUG: Comparing [#{original}] to [#{backup}]"
end
# Return if this directory has been excluded
if $options[:ignore].include?( original ) or $options[:ignore].include?( backup )
$skippedCount += 1
STDOUT.puts "SKIP: Skipping comparison of [#{original}] and [#{backup}]"
return
end
# Make sure both directories exist
unless File.directory?( original ) and File.directory?( backup )
STDOUT.puts "DIR: [#{original}] not found in [#{backup}]"
$diffCount += 1
$diffCount += countItems( original ) if $options[:count]
return
end
# If both directories exist, we check their contents
begin
Dir.foreach( original ) do |item|
next if item == "." or item == ".."
$itemCount += 1
origPath = File.join( original, item )
backupPath = File.join( backup, item )
if File.directory? origPath
# Skip symlinks if told to do so...
if File.symlink?( origPath ) and not $options[:follow]
$skippedCount += 1
STDOUT.puts "SYMLINK: [#{origPath}] skipped."
next
end
# Stay on one filesystem if told to do so...
outerDev = File::Stat.new( original ).dev
innerDev = File::Stat.new( origPath ).dev
if outerDev != innerDev and $options[:one_filesystem]
$skippedCount += 1
STDOUT.puts "DIFFFS: [#{origPath}] is on a different file system. Skipped."
next
end
compareDirs( File.join( relative, item ) )
else # It's a file
unless sameFile( origPath, backupPath )
$diffCount += 1
STDOUT.puts "FILE: [#{origPath}] not found at, or doesn't match [#{backupPath}]"
end
end
end # Dir.foreach
rescue Errno::EACCES
STDOUT.puts "ERROR: Can't read directory [#{original}]"
$errorCount += 1
end
end # compareDirs
def printSummary
differPercent = "%.2f" % ($diffCount.to_f / $itemCount.to_f * 100)
if $options[:machine]
STDOUT.puts "SUMMARY: items:#{$itemCount}, diff:#{$diffCount}, " +
"diffpct:#{differPercent}, skip:#{$skippedCount}, " +
"err:#{$errorCount}"
else
STDOUT.puts "\nSUMMARY:"
STDOUT.puts " Items processed: #{$itemCount}"
STDOUT.puts " Differences: #{$diffCount} (#{differPercent}%)"
STDOUT.puts " Similarities: #{$itemCount - $diffCount}"
STDOUT.puts " Skipped: #{$skippedCount}"
STDOUT.puts " Errors: #{$errorCount}"
end
end
# Exit gracefully on CTRL+C
trap( "SIGINT" ) do
STDOUT.puts "\n\nCaught SIGINT. Stopping."
printSummary
exit
end
compareDirs
printSummary
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment