yngwie74 · February 27, 2013 10:39 · yngwie74 · Feb 27, 2013
diff --git a/finddup.py b/finddup.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import os
 from os import path
 from itertools import imap, chain
 import fnmatch

 # Set the base path to scan for duplicates here
 base = r'/media/file-rep/files'

 def must_exist(f):
  def check(self, *args, **kwds):
    if not self.exists:
      raise ValueError('Path to directory %r does not exist.' % self.path)
    return f(self, *args, **kwds)
  return check


 class Dir(object):
  '''Encapsulates directory and sub-tree scanning logic'''
  def __init__(self, dirpath):
    self.path = path.abspath(dirpath)

  @property
  def exists(self):
    return path.exists(self.path)

  def _get_full_path_of(self, f):
    return path.normcase(path.join(self.path, f))

  def _get_entries(self, of_type, filter):
    iter = (f for f in os.listdir(self.path) if of_type(self._get_full_path_of(f)))
    if filter:
      iter = (f for f in iter if fnmatch.fnmatch(f, filter))
    return iter

  @must_exist
  def get_sub_dirs(self, filter=None):
    iter = self._get_entries(path.isdir, filter)
    return (Dir(self._get_full_path_of(f)) for f in iter)

  @must_exist
  def get_files(self, filter=None):
    iter = self._get_entries(path.isfile, filter)
    return imap(self._get_full_path_of, iter)

  def __str__(self):
    return self.path
 #~ end class Dir


 def find_depth(top, file_spec):
  dir = Dir(top)
  local = dir.get_files(file_spec)
  recursive = (file for subdir in dir.get_sub_dirs() 
                    for file in find_depth(subdir.path, file_spec))
  return chain(local, recursive)


 def find_duplicates_of(file_path):
  file_name = path.basename(file_path)
  return [r for r in sorted(find_depth(base, file_name)) if f != r]


 def print_duplicates_of(file_name, duplicates):
  file_list = '\r\t'.join([file_name] + duplicates)
  sys.stderr.write('\n%s\n' % file_list)


 if __name__ == '__main__':
  import sys
  for cur_file in Dir('.').get_files('_*.*'):
    sys.stdout.write('.')
    duplicates = find_duplicates_of(cur_file)
    if duplicates:
      print_duplicates_of(cur_file, duplicates)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import os
	from os import path
	from itertools import imap, chain
	import fnmatch

	# Set the base path to scan for duplicates here
	base = r'/media/file-rep/files'

	def must_exist(f):
	def check(self, args, *kwds):
	if not self.exists:
	raise ValueError('Path to directory %r does not exist.' % self.path)
	return f(self, args, *kwds)
	return check


	class Dir(object):
	'''Encapsulates directory and sub-tree scanning logic'''
	def __init__(self, dirpath):
	self.path = path.abspath(dirpath)

	@property
	def exists(self):
	return path.exists(self.path)

	def _get_full_path_of(self, f):
	return path.normcase(path.join(self.path, f))

	def _get_entries(self, of_type, filter):
	iter = (f for f in os.listdir(self.path) if of_type(self._get_full_path_of(f)))
	if filter:
	iter = (f for f in iter if fnmatch.fnmatch(f, filter))
	return iter

	@must_exist
	def get_sub_dirs(self, filter=None):
	iter = self._get_entries(path.isdir, filter)
	return (Dir(self._get_full_path_of(f)) for f in iter)

	@must_exist
	def get_files(self, filter=None):
	iter = self._get_entries(path.isfile, filter)
	return imap(self._get_full_path_of, iter)

	def __str__(self):
	return self.path
	#~ end class Dir


	def find_depth(top, file_spec):
	dir = Dir(top)
	local = dir.get_files(file_spec)
	recursive = (file for subdir in dir.get_sub_dirs()
	for file in find_depth(subdir.path, file_spec))
	return chain(local, recursive)


	def find_duplicates_of(file_path):
	file_name = path.basename(file_path)
	return [r for r in sorted(find_depth(base, file_name)) if f != r]


	def print_duplicates_of(file_name, duplicates):
	file_list = '\r\t'.join([file_name] + duplicates)
	sys.stderr.write('\n%s\n' % file_list)


	if __name__ == '__main__':
	import sys
	for cur_file in Dir('.').get_files('_.'):
	sys.stdout.write('.')
	duplicates = find_duplicates_of(cur_file)
	if duplicates:
	print_duplicates_of(cur_file, duplicates)