Skip to content

Instantly share code, notes, and snippets.

@mjtorn
Created May 5, 2016 16:51
Show Gist options
  • Save mjtorn/93d9f5c4696d00924301fb974d3d8984 to your computer and use it in GitHub Desktop.
Save mjtorn/93d9f5c4696d00924301fb974d3d8984 to your computer and use it in GitHub Desktop.
copykill 10-Sep-2003 17:12 7275
#!/usr/bin/python
import os, sys, string, getopt, filecmp
from path import path
from types import *
from stat import *
optlist = []
"""Global default parameters"""
doclean = 0
doquar = 0
interactive = 0
quiet = 0
recurse = 0
mode = 0
src = ''
dest = ''
quarantine = ''
quarantine = os.path.join(os.environ['HOME'], 'copyquarantine')
def dispver():
"""Displays version information"""
print "copykill copyright 2003 Markus T�rnqvist, [email protected]"
print "http://mjt.nysv.org/projects/copykill/"
print "distributed under the gpl"
print ""
def disphelp():
dispver()
"""Displays help switches"""
print "Usage: copykill [OPTION]... --src=SOURCE --dest=DESTINATION"
print " or: copykill [OPTION]... -d DIRECTORY"
print "Looks for identical files in DIRECTORY or SOURCE and DESTINATION"
print ""
helpdict = ({'-d, --dest=DESTINATION': 'destination to compare',
'-c, --clean': 'clean duplicates',
'-h, --help': 'this help here',
'-i': 'interactive, overrides -q',
'-q, --quiet': 'no output',
'-Q, --quarantine=QUARANTINE': 'quarantine dir, def: [' + quarantine + ']',
'-r': 'recurse subdirectories',
'-s, --src=SOURCE': 'source to compare'})
alflist = []
for switch in helpdict.keys():
alflist.append(switch)
alflist.sort()
for switch in alflist:
print ' %-27s %s' % (switch, helpdict[switch])
print ""
print "When quaranteening, only one of identically named identical files"
print "survives the process, that's the one in the source directory."
print "Files are paired in numerically named subdirectories."
print "You may give deeper hierarchies (quarantine/music/mp3 for example)"
print "as the quarantine directory."
sys.exit(1)
def parsargs(args):
"""Parses the arguments"""
opts, args = getopt.getopt(args,
"cd:hiqQ:rs:",
["dest",
"clean",
"help",
"quiet",
"quarantine=",
"src="])
for opt, arg in opts:
global optlist
optlist.append(opt)
"All of the Above"
if opt in ('-q', '--quiet'):
global quiet
quiet = 1
elif opt == '-r':
global recurse
recurse = 1
elif opt in ('-c', '--clean'):
global doclean
doclean = 1
elif opt in ('-h', '--help'):
disphelp()
break
elif opt == '-i':
global interactive
interactive = 1
elif opt in ('-d', '--dest'):
global dest
dest = path(arg)
elif opt in ('-Q', '--quarantine'):
global quarantine
global doquar
if not arg:
arg = quarantine
quarantine = arg
quarantine = path(quarantine)
doquar = 1
doclean = 1
elif opt in ('-s', '--src'):
global src
src = path(arg)
if not opts:
os.system(sys.argv[0] + " --help")
sys.exit(1)
if src == dest:
if not '--help' in optlist:
os.system(sys.argv[0] + " --help")
sys.exit(1)
if interactive and quiet:
quiet = 0
# End of parsargs(args)
# Do all sorts of funky stuff with the data
parsargs(sys.argv[1:])
# So, we're done here as well.
# We need these.
if not '--src' in optlist:
if not '-s' in optlist:
if '-d' in optlist:
mode = 1
src = dest
else:
if not '--help' in optlist:
os.system(sys.argv[0] + " --help")
if not '--dest' in optlist:
if not '-d' in optlist:
if not '--help' in optlist:
os.system(sys.argv[0] + " --help")
if not quiet:
dispver()
if mode == 1:
print "Directory:", dest
elif mode == 0:
print "Source:", src
print "Destination:", dest
# End of them
# Define the functions
def do_recurse(dir, list):
"""Given a directory and a list, do_recurse appends the files to the list"""
dir = path(dir).relpathto(dir)
try:
for file in dir.walkfiles():
list.append(str(file))
except OSError, (exnum, exstr):
if exnum == 13:
if not quiet:
print "Permission denied for", file
else:
raise
def do_getlists(source, destination):
"""do_getlists gets the file lists of source and destination, it may use
do_recurse for the dirty work"""
src_list = []
dest_list = []
if not recurse:
src_list = os.listdir(source)
dest_list = os.listdir(destination)
elif recurse:
os.chdir(source)
do_recurse(source, src_list)
os.chdir(destination)
do_recurse(destination, dest_list)
return src_list, dest_list
def do_cmp(src_list, dest_list):
"""do_cmp runs a filecmp.cmp() over the files in the lists"""
cmpmatchlist = []
for file in src_list:
cfile = os.path.join(src, file)
for dfile in dest_list:
cdfile = os.path.join(dest, dfile)
val = filecmp.cmp(cfile, cdfile, 0)
if val == 1:
if mode == 0:
if os.path.getsize(cfile) != 0:
cmpmatchlist.append((cfile, cdfile))
if mode == 1:
if cfile != cdfile:
if not (cdfile, cfile) in cmpmatchlist:
if os.path.getsize(cfile) != 0:
cmpmatchlist.append((cfile, cdfile))
return cmpmatchlist
def do_quarantine(src, dest, quarantine, matchlist):
"""quaranteens files"""
numlist = []
if not os.path.exists(quarantine):
os.makedirs(quarantine)
for num in range(len(matchlist)):
num = string.zfill(num, 3)
try:
os.makedirs(os.path.join(quarantine, num))
except OSError, (exnum, exstr):
if exnum == 17:
if not quiet:
print "Directory already exists: " + str(num)
else:
pass
else:
raise
numlist.append(num)
for match in matchlist:
file, dfile = match
num = numlist[0]
if not quiet:
print "Quaranteening to " + os.path.join(quarantine, num) + " file " + file
print "Quaranteening to " + os.path.join(quarantine, num) + " file " + dfile
sf = open(os.path.join(src, file), 'r')
df = open(os.path.join(dest, dfile), 'r')
qsf = open(path(quarantine / num / file.basename()), 'w')
qdf = open(path(quarantine / num / dfile.basename()), 'w')
qsf.write(sf.read())
if dfile.basename() != file.basename():
qdf.write(df.read())
qsf.close()
qdf.close()
sf.close()
df.close()
del numlist[0]
def do_clean(src, dest, matchlist):
"""cleans up extra files"""
if interactive == 1:
dodel = 0
elif interactive == 0:
dodel = 1
def do_interactive(path, file):
print "Do you want to delete", os.path.join(path, file), "[y/n] ",
answer = raw_input()
while answer not in ['y', 'n']:
print "[y/n] ",
answer = raw_input()
if answer == 'y':
dodel = 1
elif answer == 'n':
dodel = 0
return dodel
for delete in matchlist:
delfile, deldfile = delete
if interactive == 1:
dodel = do_interactive(src, delfile)
if dodel == 1:
print "[SRC] Cleaning up",
print os.path.join(src, delfile),
try:
os.remove(os.path.join(src, delfile))
print "_o/"
except OSError:
print "_o_"
if doquar:
if interactive == 1:
dodel = do_interactive(dest, deldfile)
if dodel == 1:
print "[DST] Cleaning up",
print os.path.join(dest, deldfile),
try:
os.remove(os.path.join(dest, deldfile))
print "_o/"
except OSError:
print "_o_"
# End of function definitions
# Here we get the variables we play with
src_list, dest_list = do_getlists(src, dest)
cmpmatchlist = do_cmp(src_list, dest_list)
# Got them!
# Do something sexy
if not quiet:
print ""
for match in cmpmatchlist:
print "Match: %s <=> %s" % match
print ""
if doquar:
do_quarantine(src, dest, quarantine, cmpmatchlist)
if doclean:
do_clean(src, dest, cmpmatchlist)
# Stop doing something sexy
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment