Last active
August 29, 2015 13:56
-
-
Save Raven24/8826008 to your computer and use it in GitHub Desktop.
dir comparator [wip]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'digest' | |
require 'forwardable' | |
module CompStat | |
SAMPLE_SIZE = 92 #128 | |
FIELD_SPLIT = "\t\t" | |
class File | |
attr_accessor :size, :mtime, :spl_start, :spl_length, :sample_digest | |
class << self | |
def from_line(data) | |
File.new do |f| | |
f.size = data.shift.to_i | |
f.mtime = data.shift.to_i | |
spl_data = data.shift.split(' ') | |
f.spl_start = spl_data.shift.to_i | |
f.spl_length = spl_data.shift.to_i | |
f.sample_digest = spl_data.shift | |
end | |
end | |
def from_filename(name, spl_start=nil, spl_length=nil) | |
stat = ::File.stat(name) | |
File.new do |f| | |
f.size = stat.size | |
f.mtime = stat.mtime.to_i | |
spl_size = [stat.size, SAMPLE_SIZE].min | |
f.spl_start = spl_start || rand(stat.size-spl_size).to_i | |
f.spl_length = spl_length || [stat.size-f.spl_start, SAMPLE_SIZE].min | |
f.sample_digest = Digest::SHA1.hexdigest(::File.read(name, f.spl_length, f.spl_start)) | |
end | |
rescue StandardError => e | |
nil | |
end | |
end | |
def initialize | |
yield self if block_given? | |
freeze | |
end | |
def ==(other) | |
raise ArgumentError unless other.is_a?(File) | |
raise MoreRecent if other.mtime > mtime | |
raise LessRecent if other.mtime < mtime | |
raise DifferentSize if size != other.size | |
raise DifferentSample if spl_start != other.spl_start || spl_length != other.spl_length | |
raise DifferentDigest if sample_digest != other.sample_digest | |
true | |
end | |
def to_line | |
[size, mtime, sample].join(FIELD_SPLIT) | |
end | |
private | |
def sample | |
"#{spl_start} #{spl_length} #{sample_digest}" | |
end | |
end | |
class Dir | |
attr_reader :subdirs, :files, :exclusive | |
def initialize | |
@subdirs = {} | |
@files = {} | |
@exclusive = {} | |
end | |
def compare_to(other_tree, path='.') | |
other_files = Tree.find_dir(other_tree, ::File.join(path, '.'), false).files | |
files.each do |fname, file| | |
DiffHandler.catcher(path, fname) do | |
raise FileNotInTree if other_files.nil? | |
other_file = other_files[fname] | |
raise FileNotInTree if other_file.nil? | |
file == other_file | |
end | |
end | |
subdirs.each do |dname, dir| | |
DiffHandler.catcher(path, dname) do | |
dir.compare_to(other_tree, ::File.join(path, dname)) | |
end | |
end | |
end | |
def all_files | |
files.merge(exclusive) | |
end | |
def to_lines(path='.') | |
out = "" | |
all_files.each do |fname, file| | |
out += ::File.join(path, [fname, file.to_line].join(FIELD_SPLIT)) | |
out += "\n" | |
end | |
@subdirs.each do |dname, dir| | |
out += dir.to_lines(::File.join(path, dname)) | |
end | |
out | |
end | |
def to_s(indent=0) | |
out = "" | |
all_files.keys.each_slice(3) do |fslice| | |
out += (" "*indent) + "|-- #{fslice.join(", ")}\n" | |
end | |
@subdirs.each do |k, v| | |
out += (" "*indent) + "|-+ [#{k}]\n" | |
out += v.to_s(indent+2) | |
end | |
out | |
end | |
end | |
class Tree | |
class << self | |
def from_fs(dir) | |
@basedir = ::File.realpath(dir) | |
@tree = Dir.new | |
init_counter | |
stat_fs(@basedir) | |
Tree.new @tree | |
end | |
def from_fs_and_other(dir, other_tree) | |
@basedir = ::File.realpath(dir) | |
@tree = Dir.new | |
init_counter | |
stat_fs(@basedir, other_tree) | |
Tree.new @tree | |
end | |
def from_savefile(file) | |
@basedir = '' | |
@tree = Dir.new | |
init_counter | |
::File.open(file) do |f| | |
f.each_line do |line| | |
data = line.split(FIELD_SPLIT) | |
insert_tree_leaf(data.shift) do | |
File.from_line(data) | |
end | |
end | |
end | |
Tree.new @tree | |
end | |
def find_dir(tree, path, create=true) | |
file = path.split('/')[1..-1] | |
dir = tree | |
while file.count > 1 | |
subdir = file.shift | |
raise DirNotInTree if !create && dir.subdirs[subdir].nil? | |
dir.subdirs[subdir] ||= Dir.new | |
dir = dir.subdirs[subdir] | |
end | |
dir | |
end | |
def find_file(tree, path) | |
dir = find_dir(tree, path, false) | |
f = dir.files[::File.basename(path)] | |
raise FileNotInTree if f.nil? | |
f | |
end | |
private | |
def init_counter | |
@counter = 0 | |
$stderr.puts "reading tree..." | |
end | |
def stat_fs(dir=nil, other_tree=nil) | |
raise ArgumentError if dir.nil? | |
::Dir.foreach(dir) do |item| | |
next if item=='.' || item=='..' | |
path = ::File.join(dir, item) | |
next if ::File.symlink?(path) | |
next unless ::File.exists?(path) | |
if ::File.directory? path | |
stat_fs(path, other_tree) | |
next | |
end | |
name = path[@basedir.length..-1] | |
begin | |
other_file = Tree.find_file(other_tree, name) unless other_tree.nil? | |
rescue FileNotInTree | |
end | |
exclusive = (!other_tree.nil? && other_file.nil?) | |
insert_tree_leaf(name, exclusive) do | |
if other_file | |
File.from_filename(path, other_file.spl_start, other_file.spl_length) | |
else | |
File.from_filename(path) | |
end | |
end | |
end | |
end | |
def insert_tree_leaf(file, exclusive=false) | |
dir = find_dir(@tree, file) | |
leaf = yield | |
return if leaf.nil? | |
if exclusive | |
dir.exclusive[::File.basename(file)] = leaf | |
else | |
dir.files[::File.basename(file)] = leaf | |
end | |
@counter += 1 | |
$stderr.puts("#{@counter}".rjust(8) + " files") if( @counter%5000==0 ) | |
end | |
end | |
extend Forwardable | |
def initialize(root) | |
@root = root | |
end | |
def_delegator :@root, :subdirs | |
def_delegator :@root, :files | |
def_delegator :@root, :compare_to | |
def_delegator :@root, :to_s | |
def_delegator :@root, :to_lines | |
def compare_file(name, other_file=nil, basedir=nil) | |
file = Tree.find_file(@root, name) | |
if other_file.nil? | |
raise ArgumentError if basedir.nil? || basedir.empty? | |
bd = ::File.realpath(basedir) | |
other_file = File.from_filename(::File.join(bd, name), file.spl_start, file.spl_length) | |
end | |
file==other_file | |
end | |
end | |
class Writer | |
class << self | |
def fs_tree(tree) | |
$output_handle.puts tree.to_s | |
end | |
def stat_savefile(tree) | |
$output_handle.puts tree.to_lines | |
end | |
end | |
end | |
class DiffHandler | |
class << self | |
def catcher(path, file) | |
@name = ::File.join(path, file) | |
begin | |
puts "EQUAL" if yield | |
rescue MoreRecent | |
puts "destination is more recent" | |
rescue LessRecent | |
puts "destination is older" | |
rescue DifferentSize | |
puts "filesize different" | |
rescue DifferentSample | |
puts "samples were taken from different places" | |
rescue DifferentDigest | |
puts "samples don't match" | |
rescue FileNotInTree | |
puts "destination file doesn't exist" | |
rescue DirNotInTree | |
puts "destination dir doesn't exist" | |
rescue Inequality | |
$output_handle.puts "#{name} is different" | |
end | |
end | |
private | |
def puts(msg) | |
$output_handle.puts "#{@name} -- #{msg}" | |
end | |
end | |
end | |
class Inequality < StandardError; end | |
class MoreRecent < Inequality; end | |
class LessRecent < Inequality; end | |
class DifferentSize < Inequality; end | |
class DifferentSample < Inequality; end | |
class DifferentDigest < Inequality; end | |
class FileNotInTree < Inequality; end | |
class DirNotInTree < Inequality; end | |
end | |
class App | |
def initialize(mode='stat') | |
@mode = mode | |
$output_handle = $stdout | |
end | |
def exec! | |
case @mode | |
when 'tree' | |
tree = CompStat::Tree.from_fs ARGV.shift | |
handle_output ARGV.shift | |
CompStat::Writer.fs_tree tree | |
when 'stat' | |
tree = CompStat::Tree.from_fs ARGV.shift | |
handle_output ARGV.shift | |
CompStat::Writer.stat_savefile tree | |
when 'read_tree' | |
tree = CompStat::Tree.from_savefile ARGV.shift | |
handle_output ARGV.shift | |
CompStat::Writer.fs_tree tree | |
when 'tree_two' | |
t1 = CompStat::Tree.from_fs ARGV.shift | |
t2 = CompStat::Tree.from_fs_and_other ARGV.shift, t1 | |
handle_output ARGV.shift | |
t1.compare_to(t2) | |
when 'test' | |
tree = CompStat::Tree.from_fs ARGV.shift | |
path = ARGV.shift | |
name = ARGV.shift | |
f = CompStat::Tree.find_file(tree, name) | |
other = CompStat::File.from_filename path, f.spl_start, f.spl_length | |
handle_output ARGV.shift | |
$output_handle.puts("EQUAL") if tree.compare_file name, other | |
else | |
puts 'unknown mode' | |
end | |
ensure | |
$output_handle.close | |
end | |
private | |
def handle_output(file) | |
$output_handle = File.open(file, 'w') unless file.nil? || file.empty? | |
end | |
end | |
app = App.new(ARGV.shift) | |
app.exec! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment