Skip to content

Instantly share code, notes, and snippets.

@dicksonkv
Created May 15, 2017 03:52
Show Gist options
  • Save dicksonkv/ef0d685ad67dd5ec96426060736b6530 to your computer and use it in GitHub Desktop.
Save dicksonkv/ef0d685ad67dd5ec96426060736b6530 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
'''
This script will copy all files with a specific extension to a directory.
Script will ignore all the files which has same content.
Usage : ./duplicateRemoverCopy.py -s /home/ -e .mp3 -d /music
'''
import argparse
import os
import sys
import shutil
import hashlib
'''
To check if the running user is root.
'''
if not os.geteuid() == 0:
sys.exit('Script must be run as root')
parser = argparse. ArgumentParser()
parser.add_argument("-s", dest="sourceDirectory", help="Directorry to scan for files.",metavar='/path/to/scan',required=True)
parser.add_argument("-e", dest="fileExtension", help="File extension that you want to scan.",metavar='.extension',required=True)
parser.add_argument("-d", dest="backupDirectory", help="Directory to copy the files.",metavar='/path/to/save',required=True)
args = parser.parse_args()
sourceDirectory = args.sourceDirectory
backupDirectory = args.backupDirectory
fileExtension = args.fileExtension
'''
This function will calculate the checksum of a given file and
store that hash value into a list called hashlist. The function will
return 'True' if it's found a file with duplicate content.
'''
hashlist = []
def filefound(fname):
hashObj = hashlib.new('md5')
fObj = open(fname,'rb').read()
hashObj.update(fObj)
hashSum = hashObj.hexdigest()
if hashSum not in hashlist:
hashlist.append(hashSum)
return False # not copyed yet.
else:
return True # already copyied
'''
Creating the backup Directory if it is not present.
'''
if not os.path.exists(backupDirectory):
os.mkdir(backupDirectory)
for rootDir ,subDirs,subFiles in os.walk(sourceDirectory):
for subFile in subFiles:
absPath = os.path.join(rootDir,subFile)
if absPath.endswith(fileExtension):
if not filefound(absPath):
print('Copying :',absPath)
shutil.copy(absPath,backupDirectory)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment