Last active
September 28, 2015 03:08
-
-
Save mweppler/1375184 to your computer and use it in GitHub Desktop.
Compares file hashes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'digest/md5' | |
require 'optparse' | |
class RepositoryFile | |
attr_accessor :obj_hash, :file_hash, :file_mtime, :file_name, :file_path | |
def initialize(obj_hash, file_hash, file_mtime, file_name, file_path) | |
@obj_hash = obj_hash | |
@file_hash = file_hash | |
@file_mtime = file_mtime | |
@file_name = file_name | |
@file_path = file_path | |
end | |
def self.from_file(file_name) | |
file_obj = {} | |
File.open(file_name, 'r') do |file| | |
while line = file.gets | |
file_csv = line.split ',' | |
unless file_csv.size != 5 | |
file_obj[file_csv[0]] = RepositoryFile.new(file_csv[0], file_csv[1], file_csv[2], file_csv[3], file_csv[4]) | |
end | |
end | |
end | |
return file_obj | |
end | |
def self.to_file(repository_files, file_name) | |
file = File.open(file_name, 'w') | |
repository_files.each do |key,value| | |
file.write "#{value.obj_hash},#{value.file_hash},#{value.file_mtime},#{value.file_name},#{value.file_path}\n" | |
end | |
file.close | |
end | |
def to_s | |
"obj_hash:#{@obj_hash}, file_hash:#{@file_hash}, file_mtime:#{@file_mtime}, file_name:#{@file_name}, file_path:#{@file_path}" | |
end | |
def self.to_file_names repository_files | |
file_names = [] | |
repository_files.each do |key,value| | |
file_names << value.file_name | |
end | |
return file_names | |
end | |
end | |
$BUFFER_SIZE = 1024 | |
$options = {} | |
def calculate_md5_for file_path | |
hash_buffer = Digest::MD5.new | |
File.open(file_path, 'r') do |byte| | |
while (!byte.eof) | |
temp_buffer = byte.readpartial($BUFFER_SIZE) | |
hash_buffer.update(temp_buffer) | |
end | |
end | |
return hash_buffer.hexdigest | |
end | |
def calculate_md5_for_no_white file_path | |
hash_buffer = Digest::MD5.new | |
File.open(file_path, 'r') do |byte| | |
while (!byte.eof) | |
the_byte = byte.readbyte | |
unless the_byte === 9 || the_byte === 10 || the_byte === 13 || the_byte === 32 | |
temp_buffer = the_byte.chr | |
hash_buffer.update(temp_buffer) | |
end | |
end | |
end | |
return hash_buffer.hexdigest | |
end | |
def compare_hashes_from_recorded(base, delta) | |
compare_repositories(base, delta) | |
end | |
def compare_repositories(base, delta) | |
base_inventory = RepositoryFile.to_file_names(base) | |
delta_inventory = RepositoryFile.to_file_names(delta) | |
unless $options[:skipcompare].include? 'a' | |
puts 'The following files have been added:' | |
puts delta_inventory - base_inventory | |
puts '' | |
end | |
unless $options[:skipcompare].include? 'd' | |
puts 'The following files have been deleted:' | |
puts base_inventory - delta_inventory | |
puts '' | |
end | |
unless $options[:skipcompare].include? 'c' | |
puts 'The following files have changed:' | |
compare_repositories_md5sums(base, delta) | |
puts '' | |
end | |
end | |
def compare_repositories_md5sums(base, delta) | |
base.each do |key,value| | |
if delta.include?(key) | |
delta_file = delta.fetch(key) | |
unless delta_file.file_hash === value.file_hash | |
puts "(base) #{value.file_hash} - #{value.file_mtime} - #{File.join(value.file_path, value.file_name)}\n" | |
puts "(delta) #{delta_file.file_hash} - #{delta_file.file_mtime} - #{File.join(delta_file.file_path, delta_file.file_name)}\n\n" | |
end | |
end | |
end | |
end | |
def invalid_arguments | |
puts @optparse | |
exit | |
end | |
def inventory_ignore_pattern ignore_pattern_file | |
$ignore_pattern = [] | |
file = File.open(ignore_pattern_file,'r') | |
lines = file.readlines | |
lines.each do |line| | |
line.gsub!(/\s+/, '') | |
line.gsub!("\n", '') | |
$ignore_pattern << line | |
end | |
file.close | |
end | |
def inventory_repository repository_directory | |
if not File.exists? repository_directory | |
puts "Repository #{repository_directory} does not exist." | |
invalid_arguments | |
end | |
directory_listing = [] | |
if File.directory? repository_directory | |
Dir.chdir(repository_directory) | |
Dir.glob('**/*').each do |file| | |
unless File.directory?(file) | |
if $ignore_pattern.nil? | |
directory_listing << file | |
else | |
unless should_be_ignored? file | |
directory_listing << file | |
end | |
end | |
end | |
end | |
else | |
directory_listing << repository_directory | |
end | |
repository_file_obj_hash = {} | |
directory_listing.sort!.each do |file| | |
file_and_path = File.join(repository_directory, file) | |
# repository_file_obj_hash[md5sum(file)] = RepositoryFile.new(md5sum(file), calculate_md5_for(file_and_path), File.mtime(file_and_path), file, repository_directory) | |
repository_file_obj_hash[md5sum(file)] = RepositoryFile.new(md5sum(file), calculate_md5_for_no_white(file_and_path), File.mtime(file_and_path), file, repository_directory) | |
end | |
return repository_file_obj_hash | |
end | |
def md5sum file_name | |
Digest::MD5.hexdigest file_name | |
end | |
def parse_options | |
@optparse = OptionParser.new do|opts| | |
# Define the options, and what they do | |
opts.banner = "Usage: compare_hash.rb [options]" | |
$options[:baserepo] = nil | |
opts.on( '-b', '--baserepo FILE', 'The base repository to create file hashes from' ) do |file| | |
$options[:baserepo] = file | |
end | |
$options[:deltarepo] = nil | |
opts.on( '-d', '--deltarepo FILE', 'The delta repository to test against base repository file hashes' ) do |file| | |
$options[:deltarepo] = file | |
end | |
# $options[:hashtype] = nil | |
# opts.on( '-t', '--hashtype [OPT]', 'Hash type to use: MD5, SHA1' ) do |type| | |
# $options[:hashtype] = type || 'MD5' | |
# end | |
opts.on( '-h', '--help', 'Display this screen' ) do | |
puts opts | |
exit | |
end | |
$options[:ignorefile] = nil | |
opts.on( '-i', '--ignorefile FILE', 'Plain text file containing list of files or file types to ignore.' ) do |file| | |
$options[:ignorefile] = file | |
end | |
$options[:logfile] = nil | |
opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file| | |
$options[:logfile] = file | |
end | |
$options[:recordedfile] = nil | |
opts.on( '-r', '--recordedfile FILE', 'A copy of a previous base repositories files (Manditory if no base repository is passed)' ) do |file| | |
$options[:recordedfile] = file | |
end | |
# $options[:verbose] = false | |
# opts.on( '-v', '--verbose', 'Output more information' ) do | |
# $options[:verbose] = true | |
# end | |
$options[:skipcompare] = [] | |
opts.on( '-s', '--skipcompare a,d,c', 'Skip comparing files that have been: (a=Added, d=Deleted, c=Changed)' ) do |list| | |
$options[:skipcompare] = list | |
end | |
end | |
# Parse the Options... | |
@optparse.parse! | |
end | |
def record_hashes_to_file(repository_files, output_file) | |
RepositoryFile.to_file(repository_files, output_file) | |
end | |
def should_be_ignored? file_name | |
$ignore_pattern.each do |pattern| | |
return true if file_name.include? pattern | |
end | |
return false | |
end | |
def main | |
parse_options | |
# ignore | |
unless $options[:ignorefile].nil? | |
if File.exists?($options[:ignorefile]) | |
inventory_ignore_pattern $options[:ignorefile] | |
end | |
end | |
# base repository | |
unless $options[:baserepo].nil? | |
base_repository_files = inventory_repository $options[:baserepo] | |
end | |
# delta repository | |
unless $options[:deltarepo].nil? | |
delta_repository_files = inventory_repository $options[:deltarepo] | |
end | |
if base_repository_files && delta_repository_files | |
compare_repositories_md5sums(base_repository_files, delta_repository_files) | |
puts 'Finished!' | |
exit | |
elsif base_repository_files && !delta_repository_files | |
logfile = ($options[:logfile].nil? || $options[:logfile].empty?) ? './recorded.txt' : $options[:logfile] | |
record_hashes_to_file(base_repository_files, logfile) | |
puts 'Finished!' | |
exit | |
elsif !base_repository_files && delta_repository_files | |
logfile = ($options[:logfile].nil? || $options[:logfile].empty?) ? './recorded.txt' : $options[:logfile] | |
compare_hashes_from_recorded(RepositoryFile.from_file($options[:recordedfile]) || RepositoryFile.from_file(logfile), delta_repository_files) | |
puts 'Finished!' | |
exit | |
else | |
puts 'No base or delta repository to hash/compare!' | |
invalid_arguments | |
end | |
end | |
main if __FILE__ === $0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment